Commit | Line | Data |
---|---|---|
75e28f62 E |
1 | /* |
2 | * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com> | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License as | |
6 | * published by the Free Software Foundation; either version 2 of | |
7 | * the License, or (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | */ | |
14 | ||
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <malloc.h> | |
18 | #include <math.h> | |
19 | ||
20 | #include "common.h" | |
21 | ||
22 | typedef s32 fixed_type; | |
23 | ||
24 | #define EDGE_STEP_BITS 32 | |
25 | #define FIXED_BITS 12 | |
26 | ||
27 | #define fixed_center(value) \ | |
28 | ((((fixed_type)value) << FIXED_BITS) + (1 << (FIXED_BITS - 1))) \ | |
29 | ||
30 | #define int_to_fixed(value) \ | |
31 | (((fixed_type)value) << FIXED_BITS) \ | |
32 | ||
33 | #define fixed_to_int(value) \ | |
34 | ((value) >> FIXED_BITS) \ | |
35 | ||
36 | #define fixed_mul(_a, _b) \ | |
37 | (((s64)(_a) * (_b)) >> FIXED_BITS) \ | |
38 | ||
39 | #define fixed_to_double(value) \ | |
40 | ((value) / (double)(1 << FIXED_BITS)) \ | |
41 | ||
42 | #define double_to_fixed(value) \ | |
43 | (fixed_type)(((value) * (double)(1 << FIXED_BITS))) \ | |
44 | ||
45 | typedef struct | |
46 | { | |
47 | fixed_type current_value; | |
48 | fixed_type step_dx; | |
49 | fixed_type step_dy; | |
50 | fixed_type gradient_area_x; | |
51 | fixed_type gradient_area_y; | |
52 | } interpolant_struct; | |
53 | ||
54 | typedef struct | |
55 | { | |
56 | s32 base_x; | |
57 | ||
58 | s64 left_x; | |
59 | s64 left_dx_dy; | |
60 | ||
61 | s64 right_x; | |
62 | s64 right_dx_dy; | |
63 | ||
64 | u32 triangle_area; | |
65 | u32 triangle_winding; | |
66 | ||
67 | interpolant_struct u; | |
68 | interpolant_struct v; | |
69 | interpolant_struct r; | |
70 | interpolant_struct g; | |
71 | interpolant_struct b; | |
72 | } _span_struct; | |
73 | ||
74 | ||
75 | u32 span_pixels = 0; | |
76 | u32 span_pixel_blocks = 0; | |
77 | u32 spans = 0; | |
78 | u32 triangles = 0; | |
79 | ||
80 | u32 texels_4bpp = 0; | |
81 | u32 texels_8bpp = 0; | |
82 | u32 texels_16bpp = 0; | |
83 | u32 untextured_pixels = 0; | |
84 | u32 blend_pixels = 0; | |
85 | u32 transparent_pixels = 0; | |
86 | ||
87 | u32 state_changes = 0; | |
88 | u32 render_buffer_flushes = 0; | |
89 | u32 trivial_rejects = 0; | |
90 | ||
91 | void flush_render_block_buffer(psx_gpu_struct *psx_gpu) | |
92 | { | |
93 | ||
94 | } | |
95 | ||
96 | ||
97 | u32 fixed_reciprocal(u32 denominator, u32 *_shift) | |
98 | { | |
99 | u32 shift = __builtin_clz(denominator); | |
100 | u32 denominator_normalized = denominator << shift; | |
101 | ||
102 | // Implement with a DP divide | |
103 | u32 reciprocal = | |
104 | (double)((1ULL << 62) + (denominator_normalized - 1)) / | |
105 | (double)denominator_normalized; | |
106 | ||
107 | *_shift = 62 - shift; | |
108 | return reciprocal; | |
109 | } | |
110 | ||
111 | fixed_type fixed_reciprocal_multiply(s32 numerator, u32 reciprocal, | |
112 | u32 reciprocal_sign, u32 shift) | |
113 | { | |
114 | u32 numerator_sign = (u32)numerator >> 31; | |
115 | u32 flip_sign = numerator_sign ^ reciprocal_sign; | |
116 | u32 flip_sign_mask = ~(flip_sign - 1); | |
117 | fixed_type value; | |
118 | ||
119 | numerator = abs(numerator); | |
120 | ||
121 | value = ((u64)numerator * reciprocal) >> shift; | |
122 | ||
123 | value ^= flip_sign_mask; | |
124 | value -= flip_sign_mask; | |
125 | ||
126 | return value; | |
127 | } | |
128 | ||
129 | s32 triangle_signed_area_x2(s32 x0, s32 y0, s32 x1, s32 y1, s32 x2, s32 y2) | |
130 | { | |
131 | return ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0)); | |
132 | } | |
133 | ||
134 | u32 fetch_texel_4bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v) | |
135 | { | |
136 | u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr; | |
137 | u32 texel = texture_ptr_8bpp[(v * 2048) + (u / 2)]; | |
138 | ||
139 | if(u & 1) | |
140 | texel >>= 4; | |
141 | else | |
142 | texel &= 0xF; | |
143 | ||
144 | texels_4bpp++; | |
145 | ||
146 | return psx_gpu->clut_ptr[texel]; | |
147 | } | |
148 | ||
149 | u32 fetch_texel_8bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v) | |
150 | { | |
151 | u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr; | |
152 | u32 texel = texture_ptr_8bpp[(v * 2048) + u]; | |
153 | ||
154 | texels_8bpp++; | |
155 | ||
156 | return psx_gpu->clut_ptr[texel]; | |
157 | } | |
158 | ||
159 | u32 fetch_texel_16bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v) | |
160 | { | |
161 | u16 *texture_ptr_16bpp = psx_gpu->texture_page_ptr; | |
162 | ||
163 | texels_16bpp++; | |
164 | ||
165 | return texture_ptr_16bpp[(v * 1024) + u]; | |
166 | } | |
167 | ||
168 | u32 fetch_texel(psx_gpu_struct *psx_gpu, u32 u, u32 v) | |
169 | { | |
170 | u &= psx_gpu->texture_mask_width; | |
171 | v &= psx_gpu->texture_mask_height; | |
172 | ||
173 | switch(psx_gpu->texture_mode) | |
174 | { | |
175 | case TEXTURE_MODE_4BPP: | |
176 | return fetch_texel_4bpp(psx_gpu, u, v); | |
177 | ||
178 | case TEXTURE_MODE_8BPP: | |
179 | return fetch_texel_8bpp(psx_gpu, u, v); | |
180 | ||
181 | case TEXTURE_MODE_16BPP: | |
182 | return fetch_texel_16bpp(psx_gpu, u, v); | |
183 | } | |
184 | ||
185 | return 0; | |
186 | } | |
187 | ||
188 | void draw_pixel(psx_gpu_struct *psx_gpu, s32 r, s32 g, s32 b, u32 texel, | |
189 | u32 x, u32 y, u32 flags) | |
190 | { | |
191 | u32 pixel; | |
192 | ||
193 | if(r > 31) | |
194 | r = 31; | |
195 | ||
196 | if(g > 31) | |
197 | g = 31; | |
198 | ||
199 | if(b > 31) | |
200 | b = 31; | |
201 | ||
202 | if(flags & RENDER_FLAGS_BLEND) | |
203 | { | |
204 | if(((flags & RENDER_FLAGS_TEXTURE_MAP) == 0) || (texel & 0x8000)) | |
205 | { | |
206 | s32 fb_pixel = psx_gpu->vram[(y * 1024) + x]; | |
207 | s32 fb_r = fb_pixel & 0x1F; | |
208 | s32 fb_g = (fb_pixel >> 5) & 0x1F; | |
209 | s32 fb_b = (fb_pixel >> 10) & 0x1F; | |
210 | ||
211 | blend_pixels++; | |
212 | ||
213 | switch(psx_gpu->blend_mode) | |
214 | { | |
215 | case BLEND_MODE_AVERAGE: | |
216 | r = (r + fb_r) / 2; | |
217 | g = (g + fb_g) / 2; | |
218 | b = (b + fb_b) / 2; | |
219 | break; | |
220 | ||
221 | case BLEND_MODE_ADD: | |
222 | r += fb_r; | |
223 | g += fb_g; | |
224 | b += fb_b; | |
225 | ||
226 | if(r > 31) | |
227 | r = 31; | |
228 | ||
229 | if(g > 31) | |
230 | g = 31; | |
231 | ||
232 | if(b > 31) | |
233 | b = 31; | |
234 | ||
235 | break; | |
236 | ||
237 | case BLEND_MODE_SUBTRACT: | |
238 | r = fb_r - r; | |
239 | g = fb_g - g; | |
240 | b = fb_b - b; | |
241 | ||
242 | if(r < 0) | |
243 | r = 0; | |
244 | ||
245 | if(g < 0) | |
246 | g = 0; | |
247 | ||
248 | if(b < 0) | |
249 | b = 0; | |
250 | ||
251 | break; | |
252 | ||
253 | case BLEND_MODE_ADD_FOURTH: | |
254 | r = fb_r + (r / 4); | |
255 | g = fb_g + (g / 4); | |
256 | b = fb_b + (b / 4); | |
257 | ||
258 | if(r > 31) | |
259 | r = 31; | |
260 | ||
261 | if(g > 31) | |
262 | g = 31; | |
263 | ||
264 | if(b > 31) | |
265 | b = 31; | |
266 | ||
267 | break; | |
268 | } | |
269 | } | |
270 | } | |
271 | ||
272 | pixel = r | (g << 5) | (b << 10); | |
273 | ||
274 | if(psx_gpu->mask_apply || (texel & 0x8000)) | |
275 | pixel |= 0x8000; | |
276 | ||
277 | psx_gpu->vram[(y * 1024) + x] = pixel; | |
278 | } | |
279 | ||
280 | s32 dither_table[4][4] = | |
281 | { | |
282 | { -4, 0, -3, 1 }, | |
283 | { 2, -2, 3, -1 }, | |
284 | { -3, 1, -4, 0 }, | |
285 | { 3, -1, 2, -2 }, | |
286 | }; | |
287 | ||
288 | void render_span(psx_gpu_struct *psx_gpu, _span_struct *span, s32 y, | |
289 | u32 flags) | |
290 | { | |
291 | s32 left_x = span->left_x >> EDGE_STEP_BITS; | |
292 | s32 right_x = span->right_x >> EDGE_STEP_BITS; | |
293 | s32 current_x = left_x; | |
294 | s32 delta_x; | |
295 | ||
296 | fixed_type current_u = span->u.current_value; | |
297 | fixed_type current_v = span->v.current_value; | |
298 | fixed_type current_r = span->r.current_value; | |
299 | fixed_type current_g = span->g.current_value; | |
300 | fixed_type current_b = span->b.current_value; | |
301 | ||
302 | if(y < psx_gpu->viewport_start_y) | |
303 | return; | |
304 | ||
305 | if(y > psx_gpu->viewport_end_y) | |
306 | return; | |
307 | ||
308 | if(right_x < psx_gpu->viewport_start_x) | |
309 | return; | |
310 | ||
311 | if(current_x > psx_gpu->viewport_end_x) | |
312 | return; | |
313 | ||
314 | spans++; | |
315 | ||
316 | if(current_x < psx_gpu->viewport_start_x) | |
317 | current_x = psx_gpu->viewport_start_x; | |
318 | ||
319 | if(right_x > psx_gpu->viewport_end_x + 1) | |
320 | right_x = psx_gpu->viewport_end_x + 1; | |
321 | ||
322 | delta_x = current_x - span->base_x; | |
323 | ||
324 | current_u += delta_x * span->u.step_dx; | |
325 | current_v += delta_x * span->v.step_dx; | |
326 | current_r += delta_x * span->r.step_dx; | |
327 | current_g += delta_x * span->g.step_dx; | |
328 | current_b += delta_x * span->b.step_dx; | |
329 | ||
330 | span_pixels += right_x - current_x; | |
331 | span_pixel_blocks += ((right_x / 8) - (current_x / 8)) + 1; | |
332 | ||
333 | while(current_x < right_x) | |
334 | { | |
335 | s32 color_r, color_g, color_b; | |
336 | u32 texel = 0; | |
337 | ||
338 | if(psx_gpu->mask_evaluate && | |
339 | (psx_gpu->vram[(y * 1024) + current_x] & 0x8000)) | |
340 | { | |
341 | goto skip_pixel; | |
342 | } | |
343 | ||
344 | if(flags & RENDER_FLAGS_SHADE) | |
345 | { | |
346 | color_r = fixed_to_int(current_r); | |
347 | color_g = fixed_to_int(current_g); | |
348 | color_b = fixed_to_int(current_b); | |
349 | } | |
350 | else | |
351 | { | |
352 | color_r = psx_gpu->primitive_color & 0xFF; | |
353 | color_g = (psx_gpu->primitive_color >> 8) & 0xFF; | |
354 | color_b = (psx_gpu->primitive_color >> 16) & 0xFF; | |
355 | } | |
356 | ||
357 | if(flags & RENDER_FLAGS_TEXTURE_MAP) | |
358 | { | |
359 | u32 texel_r, texel_g, texel_b; | |
360 | u32 u = fixed_to_int(current_u); | |
361 | u32 v = fixed_to_int(current_v); | |
362 | ||
363 | texel = fetch_texel(psx_gpu, u, v); | |
364 | ||
365 | if(texel == 0) | |
366 | { | |
367 | transparent_pixels++; | |
368 | goto skip_pixel; | |
369 | } | |
370 | ||
371 | texel_r = texel & 0x1F; | |
372 | texel_g = (texel >> 5) & 0x1F; | |
373 | texel_b = (texel >> 10) & 0x1F; | |
374 | ||
375 | if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0) | |
376 | { | |
377 | color_r *= texel_r; | |
378 | color_g *= texel_g; | |
379 | color_b *= texel_b; | |
380 | } | |
381 | else | |
382 | { | |
383 | color_r = texel_r << 7; | |
384 | color_g = texel_g << 7; | |
385 | color_b = texel_b << 7; | |
386 | } | |
387 | ||
388 | color_r >>= 4; | |
389 | color_g >>= 4; | |
390 | color_b >>= 4; | |
391 | } | |
392 | else | |
393 | { | |
394 | untextured_pixels++; | |
395 | } | |
396 | ||
397 | if(psx_gpu->dither_mode && ((flags & RENDER_FLAGS_SHADE) || | |
398 | ((flags & RENDER_FLAGS_TEXTURE_MAP) && | |
399 | ((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)))) | |
400 | { | |
401 | s32 dither_offset = dither_table[y % 4][current_x % 4]; | |
402 | color_r += dither_offset; | |
403 | color_g += dither_offset; | |
404 | color_b += dither_offset; | |
405 | ||
406 | if(color_r < 0) | |
407 | color_r = 0; | |
408 | ||
409 | if(color_g < 0) | |
410 | color_g = 0; | |
411 | ||
412 | if(color_b < 0) | |
413 | color_b = 0; | |
414 | } | |
415 | ||
416 | color_r >>= 3; | |
417 | color_g >>= 3; | |
418 | color_b >>= 3; | |
419 | ||
420 | draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x, y, flags); | |
421 | ||
422 | skip_pixel: | |
423 | ||
424 | current_u += span->u.step_dx; | |
425 | current_v += span->v.step_dx; | |
426 | current_r += span->r.step_dx; | |
427 | current_g += span->g.step_dx; | |
428 | current_b += span->b.step_dx; | |
429 | ||
430 | current_x++; | |
431 | } | |
432 | } | |
433 | ||
434 | void increment_span(_span_struct *span) | |
435 | { | |
436 | span->left_x += span->left_dx_dy; | |
437 | span->right_x += span->right_dx_dy; | |
438 | ||
439 | span->u.current_value += span->u.step_dy; | |
440 | span->v.current_value += span->v.step_dy; | |
441 | span->r.current_value += span->r.step_dy; | |
442 | span->g.current_value += span->g.step_dy; | |
443 | span->b.current_value += span->b.step_dy; | |
444 | } | |
445 | ||
446 | void decrement_span(_span_struct *span) | |
447 | { | |
448 | span->left_x += span->left_dx_dy; | |
449 | span->right_x += span->right_dx_dy; | |
450 | ||
451 | span->u.current_value -= span->u.step_dy; | |
452 | span->v.current_value -= span->v.step_dy; | |
453 | span->r.current_value -= span->r.step_dy; | |
454 | span->g.current_value -= span->g.step_dy; | |
455 | span->b.current_value -= span->b.step_dy; | |
456 | } | |
457 | ||
458 | ||
459 | #define compute_gradient_area_x(interpolant) \ | |
460 | { \ | |
461 | span.interpolant.gradient_area_x = \ | |
462 | triangle_signed_area_x2(a->interpolant, a->y, b->interpolant, b->y, \ | |
463 | c->interpolant, c->y); \ | |
464 | } \ | |
465 | ||
466 | #define compute_gradient_area_y(interpolant) \ | |
467 | { \ | |
468 | span.interpolant.gradient_area_y = \ | |
469 | triangle_signed_area_x2(a->x, a->interpolant, b->x, b->interpolant, \ | |
470 | c->x, c->interpolant); \ | |
471 | } \ | |
472 | ||
473 | #define compute_all_gradient_areas() \ | |
474 | compute_gradient_area_x(u); \ | |
475 | compute_gradient_area_x(v); \ | |
476 | compute_gradient_area_x(r); \ | |
477 | compute_gradient_area_x(g); \ | |
478 | compute_gradient_area_x(b); \ | |
479 | compute_gradient_area_y(u); \ | |
480 | compute_gradient_area_y(v); \ | |
481 | compute_gradient_area_y(r); \ | |
482 | compute_gradient_area_y(g); \ | |
483 | compute_gradient_area_y(b) \ | |
484 | ||
485 | #define set_interpolant_base(interpolant, base_vertex) \ | |
486 | span->interpolant.step_dx = \ | |
487 | fixed_reciprocal_multiply(span->interpolant.gradient_area_x, reciprocal, \ | |
488 | span->triangle_winding, shift); \ | |
489 | span->interpolant.step_dy = \ | |
490 | fixed_reciprocal_multiply(span->interpolant.gradient_area_y, reciprocal, \ | |
491 | span->triangle_winding, shift); \ | |
492 | span->interpolant.current_value = fixed_center(base_vertex->interpolant) \ | |
493 | ||
494 | #define set_interpolant_bases(base_vertex) \ | |
495 | { \ | |
496 | u32 shift; \ | |
497 | u32 reciprocal = fixed_reciprocal(span->triangle_area, &shift); \ | |
498 | shift -= FIXED_BITS; \ | |
499 | set_interpolant_base(u, base_vertex); \ | |
500 | set_interpolant_base(v, base_vertex); \ | |
501 | set_interpolant_base(r, base_vertex); \ | |
502 | set_interpolant_base(g, base_vertex); \ | |
503 | set_interpolant_base(b, base_vertex); \ | |
504 | span->base_x = span->left_x >> EDGE_STEP_BITS; \ | |
505 | } \ | |
506 | ||
507 | #define compute_edge_delta(edge, start, end, height) \ | |
508 | { \ | |
509 | s32 x_start = start->x; \ | |
510 | s32 x_end = end->x; \ | |
511 | s32 width = x_end - x_start; \ | |
512 | \ | |
513 | s32 shift = __builtin_clz(height); \ | |
514 | u32 height_normalized = height << shift; \ | |
515 | u32 height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / \ | |
516 | height_normalized; \ | |
517 | \ | |
518 | shift -= (50 - EDGE_STEP_BITS); \ | |
519 | \ | |
520 | span->edge##_x = \ | |
521 | ((((s64)x_start * height) + (height - 1)) * height_reciprocal) << shift; \ | |
522 | span->edge##_dx_dy = ((s64)width * height_reciprocal) << shift; \ | |
523 | } \ | |
524 | ||
525 | ||
526 | #define render_spans_up(height) \ | |
527 | do \ | |
528 | { \ | |
529 | decrement_span(span); \ | |
530 | render_span(psx_gpu, span, current_y, flags); \ | |
531 | current_y--; \ | |
532 | height--; \ | |
533 | } while(height) \ | |
534 | ||
535 | #define render_spans_down(height) \ | |
536 | do \ | |
537 | { \ | |
538 | render_span(psx_gpu, span, current_y, flags); \ | |
539 | increment_span(span); \ | |
540 | current_y++; \ | |
541 | height--; \ | |
542 | } while(height) \ | |
543 | ||
544 | #define render_spans_up_up(minor, major) \ | |
545 | s32 current_y = bottom->y - 1; \ | |
546 | s32 height_minor_a = bottom->y - middle->y; \ | |
547 | s32 height_minor_b = middle->y - top->y; \ | |
548 | s32 height_major = height_minor_a + height_minor_b; \ | |
549 | \ | |
550 | compute_edge_delta(major, bottom, top, height_major); \ | |
551 | compute_edge_delta(minor, bottom, middle, height_minor_a); \ | |
552 | set_interpolant_bases(bottom); \ | |
553 | \ | |
554 | render_spans_up(height_minor_a); \ | |
555 | \ | |
556 | compute_edge_delta(minor, middle, top, height_minor_b); \ | |
557 | render_spans_up(height_minor_b) \ | |
558 | ||
559 | void render_spans_up_left(psx_gpu_struct *psx_gpu, _span_struct *span, | |
560 | vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags) | |
561 | { | |
562 | render_spans_up_up(left, right); | |
563 | } | |
564 | ||
565 | void render_spans_up_right(psx_gpu_struct *psx_gpu, _span_struct *span, | |
566 | vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags) | |
567 | { | |
568 | render_spans_up_up(right, left); | |
569 | } | |
570 | ||
571 | #define render_spans_down_down(minor, major) \ | |
572 | s32 current_y = top->y; \ | |
573 | s32 height_minor_a = middle->y - top->y; \ | |
574 | s32 height_minor_b = bottom->y - middle->y; \ | |
575 | s32 height_major = height_minor_a + height_minor_b; \ | |
576 | \ | |
577 | compute_edge_delta(minor, top, middle, height_minor_a); \ | |
578 | compute_edge_delta(major, top, bottom, height_major); \ | |
579 | set_interpolant_bases(top); \ | |
580 | \ | |
581 | render_spans_down(height_minor_a); \ | |
582 | \ | |
583 | compute_edge_delta(minor, middle, bottom, height_minor_b); \ | |
584 | render_spans_down(height_minor_b) \ | |
585 | ||
586 | void render_spans_down_left(psx_gpu_struct *psx_gpu, _span_struct *span, | |
587 | vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags) | |
588 | { | |
589 | render_spans_down_down(left, right); | |
590 | } | |
591 | ||
592 | void render_spans_down_right(psx_gpu_struct *psx_gpu, _span_struct *span, | |
593 | vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags) | |
594 | { | |
595 | render_spans_down_down(right, left); | |
596 | } | |
597 | ||
598 | #define render_spans_up_flat(bottom_left, bottom_right, top_left, top_right) \ | |
599 | s32 current_y = bottom_left->y - 1; \ | |
600 | s32 height = bottom_left->y - top_left->y; \ | |
601 | \ | |
602 | compute_edge_delta(left, bottom_left, top_left, height); \ | |
603 | compute_edge_delta(right, bottom_right, top_right, height); \ | |
604 | set_interpolant_bases(bottom_left); \ | |
605 | render_spans_up(height) \ | |
606 | ||
607 | void render_spans_up_a(psx_gpu_struct *psx_gpu, _span_struct *span, | |
608 | vertex_struct *bottom_left, vertex_struct *bottom_right, vertex_struct *top, | |
609 | u32 flags) | |
610 | { | |
611 | render_spans_up_flat(bottom_left, bottom_right, top, top); | |
612 | } | |
613 | ||
614 | void render_spans_up_b(psx_gpu_struct *psx_gpu, _span_struct *span, | |
615 | vertex_struct *bottom, vertex_struct *top_left, vertex_struct *top_right, | |
616 | u32 flags) | |
617 | { | |
618 | render_spans_up_flat(bottom, bottom, top_left, top_right); | |
619 | } | |
620 | ||
621 | #define render_spans_down_flat(top_left, top_right, bottom_left, bottom_right) \ | |
622 | s32 current_y = top_left->y; \ | |
623 | s32 height = bottom_left->y - top_left->y; \ | |
624 | \ | |
625 | compute_edge_delta(left, top_left, bottom_left, height); \ | |
626 | compute_edge_delta(right, top_right, bottom_right, height); \ | |
627 | set_interpolant_bases(top_left); \ | |
628 | render_spans_down(height) \ | |
629 | ||
630 | void render_spans_down_a(psx_gpu_struct *psx_gpu, _span_struct *span, | |
631 | vertex_struct *top_left, vertex_struct *top_right, vertex_struct *bottom, | |
632 | u32 flags) | |
633 | { | |
634 | render_spans_down_flat(top_left, top_right, bottom, bottom); | |
635 | } | |
636 | ||
637 | void render_spans_down_b(psx_gpu_struct *psx_gpu, _span_struct *span, | |
638 | vertex_struct *top, vertex_struct *bottom_left, vertex_struct *bottom_right, | |
639 | u32 flags) | |
640 | { | |
641 | render_spans_down_flat(top, top, bottom_left, bottom_right); | |
642 | } | |
643 | ||
644 | void render_spans_up_down(psx_gpu_struct *psx_gpu, _span_struct *span, | |
645 | vertex_struct *middle, vertex_struct *top, vertex_struct *bottom, u32 flags) | |
646 | { | |
647 | s32 middle_y = middle->y; | |
648 | s32 current_y = middle_y - 1; | |
649 | s32 height_minor_a = middle->y - top->y; | |
650 | s32 height_minor_b = bottom->y - middle->y; | |
651 | s32 height_major = height_minor_a + height_minor_b; | |
652 | ||
653 | u64 right_x_mid; | |
654 | ||
655 | compute_edge_delta(left, middle, top, height_minor_a); | |
656 | compute_edge_delta(right, bottom, top, height_major); | |
657 | set_interpolant_bases(middle); | |
658 | ||
659 | right_x_mid = span->right_x + (span->right_dx_dy * height_minor_b); | |
660 | span->right_x = right_x_mid; | |
661 | ||
662 | render_spans_up(height_minor_a); | |
663 | ||
664 | compute_edge_delta(left, middle, bottom, height_minor_b); | |
665 | set_interpolant_bases(middle); | |
666 | ||
667 | span->right_dx_dy *= -1; | |
668 | span->right_x = right_x_mid; | |
669 | current_y = middle_y; | |
670 | ||
671 | render_spans_down(height_minor_b); | |
672 | } | |
673 | ||
674 | #define vertex_swap(_a, _b) \ | |
675 | { \ | |
676 | vertex_struct *temp_vertex = _a; \ | |
677 | _a = _b; \ | |
678 | _b = temp_vertex; \ | |
679 | triangle_winding ^= 1; \ | |
680 | } \ | |
681 | ||
682 | ||
683 | #define triangle_y_direction_up 1 | |
684 | #define triangle_y_direction_flat 2 | |
685 | #define triangle_y_direction_down 0 | |
686 | ||
687 | #define triangle_winding_positive 0 | |
688 | #define triangle_winding_negative 1 | |
689 | ||
690 | #define triangle_set_direction(direction_variable, value) \ | |
691 | u32 direction_variable = (u32)(value) >> 31; \ | |
692 | if(value == 0) \ | |
693 | direction_variable = 2 \ | |
694 | ||
695 | #define triangle_case(direction_a, direction_b, direction_c, winding) \ | |
696 | case (triangle_y_direction_##direction_a | \ | |
697 | (triangle_y_direction_##direction_b << 2) | \ | |
698 | (triangle_y_direction_##direction_c << 4) | \ | |
699 | (triangle_winding_##winding << 6)) \ | |
700 | ||
701 | ||
702 | void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, | |
703 | u32 flags) | |
704 | { | |
705 | s32 triangle_area; | |
706 | u32 triangle_winding = 0; | |
707 | _span_struct span; | |
708 | ||
709 | vertex_struct *a = &(vertexes[0]); | |
710 | vertex_struct *b = &(vertexes[1]); | |
711 | vertex_struct *c = &(vertexes[2]); | |
712 | ||
713 | triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y); | |
714 | ||
715 | triangles++; | |
716 | ||
717 | if(triangle_area == 0) | |
718 | return; | |
719 | ||
720 | if(b->y < a->y) | |
721 | vertex_swap(a, b); | |
722 | ||
723 | if(c->y < b->y) | |
724 | { | |
725 | vertex_swap(b, c); | |
726 | ||
727 | if(b->y < a->y) | |
728 | vertex_swap(a, b); | |
729 | } | |
730 | ||
731 | if((c->y - a->y) >= 512) | |
732 | return; | |
733 | ||
734 | if(triangle_area < 0) | |
735 | { | |
736 | triangle_area = -triangle_area; | |
737 | triangle_winding ^= 1; | |
738 | vertex_swap(a, c); | |
739 | } | |
740 | ||
741 | if(b->x < a->x) | |
742 | vertex_swap(a, b); | |
743 | ||
744 | if(c->x < b->x) | |
745 | { | |
746 | vertex_swap(b, c); | |
747 | ||
748 | if(b->x < a->x) | |
749 | vertex_swap(a, b); | |
750 | } | |
751 | ||
752 | if((c->x - a->x) >= 1024) | |
753 | return; | |
754 | ||
755 | s32 y_delta_a = b->y - a->y; | |
756 | s32 y_delta_b = c->y - b->y; | |
757 | s32 y_delta_c = c->y - a->y; | |
758 | ||
759 | triangle_set_direction(y_direction_a, y_delta_a); | |
760 | triangle_set_direction(y_direction_b, y_delta_b); | |
761 | triangle_set_direction(y_direction_c, y_delta_c); | |
762 | ||
763 | compute_all_gradient_areas(); | |
764 | span.triangle_area = triangle_area; | |
765 | span.triangle_winding = triangle_winding; | |
766 | ||
767 | switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) | | |
768 | (triangle_winding << 6)) | |
769 | { | |
770 | triangle_case(up, up, up, negative): | |
771 | triangle_case(up, up, flat, negative): | |
772 | triangle_case(up, up, down, negative): | |
773 | render_spans_up_right(psx_gpu, &span, a, b, c, flags); | |
774 | break; | |
775 | ||
776 | triangle_case(flat, up, up, negative): | |
777 | triangle_case(flat, up, flat, negative): | |
778 | triangle_case(flat, up, down, negative): | |
779 | render_spans_up_a(psx_gpu, &span, a, b, c, flags); | |
780 | break; | |
781 | ||
782 | triangle_case(down, up, up, negative): | |
783 | render_spans_up_down(psx_gpu, &span, a, c, b, flags); | |
784 | break; | |
785 | ||
786 | triangle_case(down, up, flat, negative): | |
787 | render_spans_down_a(psx_gpu, &span, a, c, b, flags); | |
788 | break; | |
789 | ||
790 | triangle_case(down, up, down, negative): | |
791 | render_spans_down_right(psx_gpu, &span, a, c, b, flags); | |
792 | break; | |
793 | ||
794 | triangle_case(down, flat, up, negative): | |
795 | triangle_case(down, flat, flat, negative): | |
796 | triangle_case(down, flat, down, negative): | |
797 | render_spans_down_b(psx_gpu, &span, a, b, c, flags); | |
798 | break; | |
799 | ||
800 | triangle_case(down, down, up, negative): | |
801 | triangle_case(down, down, flat, negative): | |
802 | triangle_case(down, down, down, negative): | |
803 | render_spans_down_left(psx_gpu, &span, a, b, c, flags); | |
804 | break; | |
805 | ||
806 | triangle_case(up, up, up, positive): | |
807 | triangle_case(up, up, flat, positive): | |
808 | triangle_case(up, up, down, positive): | |
809 | render_spans_up_left(psx_gpu, &span, a, b, c, flags); | |
810 | break; | |
811 | ||
812 | triangle_case(up, flat, up, positive): | |
813 | triangle_case(up, flat, flat, positive): | |
814 | triangle_case(up, flat, down, positive): | |
815 | render_spans_up_b(psx_gpu, &span, a, b, c, flags); | |
816 | break; | |
817 | ||
818 | triangle_case(up, down, up, positive): | |
819 | render_spans_up_right(psx_gpu, &span, a, c, b, flags); | |
820 | break; | |
821 | ||
822 | triangle_case(up, down, flat, positive): | |
823 | render_spans_up_a(psx_gpu, &span, a, c, b, flags); | |
824 | break; | |
825 | ||
826 | triangle_case(up, down, down, positive): | |
827 | render_spans_up_down(psx_gpu, &span, a, b, c, flags); | |
828 | break; | |
829 | ||
830 | triangle_case(flat, down, up, positive): | |
831 | triangle_case(flat, down, flat, positive): | |
832 | triangle_case(flat, down, down, positive): | |
833 | render_spans_down_a(psx_gpu, &span, a, b, c, flags); | |
834 | break; | |
835 | ||
836 | triangle_case(down, down, up, positive): | |
837 | triangle_case(down, down, flat, positive): | |
838 | triangle_case(down, down, down, positive): | |
839 | render_spans_down_right(psx_gpu, &span, a, b, c, flags); | |
840 | break; | |
841 | } | |
842 | ||
843 | } | |
844 | ||
845 | ||
846 | void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, | |
847 | s32 width, s32 height, u32 flags) | |
848 | { | |
849 | // TODO: Flip/mirror | |
850 | s32 current_x, current_y; | |
851 | u32 current_u, current_v; | |
852 | u32 primitive_color = psx_gpu->primitive_color; | |
853 | u32 sprite_r, sprite_g, sprite_b; | |
854 | s32 color_r = 0; | |
855 | s32 color_g = 0; | |
856 | s32 color_b = 0; | |
857 | u32 texel = 0; | |
858 | ||
859 | sprite_r = primitive_color & 0xFF; | |
860 | sprite_g = (primitive_color >> 8) & 0xFF; | |
861 | sprite_b = (primitive_color >> 16) & 0xFF; | |
862 | ||
863 | static u32 sprites = 0; | |
864 | ||
865 | sprites++; | |
866 | ||
867 | for(current_y = y, current_v = v; | |
868 | current_y < y + height; current_y++, current_v++) | |
869 | { | |
870 | for(current_x = x, current_u = u; | |
871 | current_x < x + width; current_x++, current_u++) | |
872 | { | |
873 | if((current_x >= psx_gpu->viewport_start_x) && | |
874 | (current_y >= psx_gpu->viewport_start_y) && | |
875 | (current_x <= psx_gpu->viewport_end_x) && | |
876 | (current_y <= psx_gpu->viewport_end_y)) | |
877 | { | |
878 | if(psx_gpu->mask_evaluate && | |
879 | (psx_gpu->vram[(y * 1024) + current_x] & 0x8000)) | |
880 | { | |
881 | continue; | |
882 | } | |
883 | ||
884 | if(flags & RENDER_FLAGS_TEXTURE_MAP) | |
885 | { | |
886 | texel = fetch_texel(psx_gpu, current_u, current_v); | |
887 | if(texel == 0) | |
888 | continue; | |
889 | ||
890 | color_r = texel & 0x1F; | |
891 | color_g = (texel >> 5) & 0x1F; | |
892 | color_b = (texel >> 10) & 0x1F; | |
893 | ||
894 | if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0) | |
895 | { | |
896 | color_r *= sprite_r; | |
897 | color_g *= sprite_g; | |
898 | color_b *= sprite_b; | |
899 | ||
900 | color_r >>= 7; | |
901 | color_g >>= 7; | |
902 | color_b >>= 7; | |
903 | } | |
904 | } | |
905 | else | |
906 | { | |
907 | color_r = sprite_r >> 3; | |
908 | color_g = sprite_g >> 3; | |
909 | color_b = sprite_b >> 3; | |
910 | } | |
911 | ||
912 | draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x, | |
913 | current_y, flags); | |
914 | } | |
915 | } | |
916 | } | |
917 | } | |
918 | ||
919 | ||
920 | #define draw_pixel_line(_x, _y) \ | |
921 | if((_x >= psx_gpu->viewport_start_x) && (_y >= psx_gpu->viewport_start_y) && \ | |
922 | (_x <= psx_gpu->viewport_end_x) && (_y <= psx_gpu->viewport_end_y)) \ | |
923 | { \ | |
924 | if(flags & RENDER_FLAGS_SHADE) \ | |
925 | { \ | |
926 | color_r = fixed_to_int(current_r); \ | |
927 | color_g = fixed_to_int(current_g); \ | |
928 | color_b = fixed_to_int(current_b); \ | |
929 | \ | |
930 | current_r += gradient_r; \ | |
931 | current_g += gradient_g; \ | |
932 | current_b += gradient_b; \ | |
933 | } \ | |
934 | else \ | |
935 | { \ | |
936 | color_r = primitive_color & 0xFF; \ | |
937 | color_g = (primitive_color >> 8) & 0xFF; \ | |
938 | color_b = (primitive_color >> 16) & 0xFF; \ | |
939 | } \ | |
940 | \ | |
941 | if(psx_gpu->dither_mode) \ | |
942 | { \ | |
943 | s32 dither_offset = dither_table[_y % 4][_x % 4]; \ | |
944 | \ | |
945 | color_r += dither_offset; \ | |
946 | color_g += dither_offset; \ | |
947 | color_b += dither_offset; \ | |
948 | \ | |
949 | if(color_r < 0) \ | |
950 | color_r = 0; \ | |
951 | \ | |
952 | if(color_g < 0) \ | |
953 | color_g = 0; \ | |
954 | \ | |
955 | if(color_b < 0) \ | |
956 | color_b = 0; \ | |
957 | } \ | |
958 | color_r >>= 3; \ | |
959 | color_g >>= 3; \ | |
960 | color_b >>= 3; \ | |
961 | \ | |
962 | span_pixels++; \ | |
963 | \ | |
964 | draw_pixel(psx_gpu, color_r, color_g, color_b, 0, _x, _y, flags); \ | |
965 | } \ | |
966 | ||
967 | #define update_increment(value) \ | |
968 | value++ \ | |
969 | ||
970 | #define update_decrement(value) \ | |
971 | value-- \ | |
972 | ||
973 | #define compare_increment(a, b) \ | |
974 | (a <= b) \ | |
975 | ||
976 | #define compare_decrement(a, b) \ | |
977 | (a >= b) \ | |
978 | ||
979 | #define set_line_gradients(minor) \ | |
980 | { \ | |
981 | s32 gradient_divisor = delta_##minor; \ | |
982 | gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor; \ | |
983 | gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor; \ | |
984 | gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor; \ | |
985 | current_r = fixed_center(vertex_a->r); \ | |
986 | current_g = fixed_center(vertex_a->g); \ | |
987 | current_b = fixed_center(vertex_a->b); \ | |
988 | } | |
989 | ||
990 | #define draw_line_span_horizontal(direction) \ | |
991 | do \ | |
992 | { \ | |
993 | error_step = delta_y * 2; \ | |
994 | error_wrap = delta_x * 2; \ | |
995 | error = delta_x; \ | |
996 | \ | |
997 | current_y = y_a; \ | |
998 | set_line_gradients(x); \ | |
999 | \ | |
1000 | for(current_x = x_a; current_x <= x_b; current_x++) \ | |
1001 | { \ | |
1002 | draw_pixel_line(current_x, current_y); \ | |
1003 | error += error_step; \ | |
1004 | \ | |
1005 | if(error >= error_wrap) \ | |
1006 | { \ | |
1007 | update_##direction(current_y); \ | |
1008 | error -= error_wrap; \ | |
1009 | } \ | |
1010 | } \ | |
1011 | } while(0) \ | |
1012 | ||
1013 | #define draw_line_span_vertical(direction) \ | |
1014 | do \ | |
1015 | { \ | |
1016 | error_step = delta_x * 2; \ | |
1017 | error_wrap = delta_y * 2; \ | |
1018 | error = delta_y; \ | |
1019 | \ | |
1020 | current_x = x_a; \ | |
1021 | set_line_gradients(y); \ | |
1022 | \ | |
1023 | for(current_y = y_a; compare_##direction(current_y, y_b); \ | |
1024 | update_##direction(current_y)) \ | |
1025 | { \ | |
1026 | draw_pixel_line(current_x, current_y); \ | |
1027 | error += error_step; \ | |
1028 | \ | |
1029 | if(error > error_wrap) \ | |
1030 | { \ | |
1031 | current_x++; \ | |
1032 | error -= error_wrap; \ | |
1033 | } \ | |
1034 | } \ | |
1035 | } while(0) \ | |
1036 | ||
1037 | void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags) | |
1038 | { | |
1039 | u32 primitive_color = psx_gpu->primitive_color; | |
1040 | s32 color_r, color_g, color_b; | |
1041 | ||
1042 | fixed_type gradient_r = 0; | |
1043 | fixed_type gradient_g = 0; | |
1044 | fixed_type gradient_b = 0; | |
1045 | fixed_type current_r = 0; | |
1046 | fixed_type current_g = 0; | |
1047 | fixed_type current_b = 0; | |
1048 | ||
1049 | s32 y_a, y_b; | |
1050 | s32 x_a, x_b; | |
1051 | ||
1052 | s32 delta_x, delta_y; | |
1053 | u32 triangle_winding = 0; | |
1054 | ||
1055 | s32 current_x; | |
1056 | s32 current_y; | |
1057 | ||
1058 | u32 error_step; | |
1059 | u32 error; | |
1060 | u32 error_wrap; | |
1061 | ||
1062 | vertex_struct *vertex_a = &(vertexes[0]); | |
1063 | vertex_struct *vertex_b = &(vertexes[1]); | |
1064 | ||
1065 | if(vertex_a->x >= vertex_b->x) | |
1066 | { | |
1067 | vertex_swap(vertex_a, vertex_b); | |
1068 | } | |
1069 | ||
1070 | x_a = vertex_a->x; | |
1071 | x_b = vertex_b->x; | |
1072 | ||
1073 | y_a = vertex_a->y; | |
1074 | y_b = vertex_b->y; | |
1075 | ||
1076 | delta_x = x_b - x_a; | |
1077 | delta_y = y_b - y_a; | |
1078 | ||
1079 | if(delta_x >= 1024) | |
1080 | return; | |
1081 | ||
1082 | flags &= ~RENDER_FLAGS_TEXTURE_MAP; | |
1083 | ||
1084 | if(delta_y < 0) | |
1085 | { | |
1086 | delta_y *= -1; | |
1087 | ||
1088 | if(delta_y >= 512) | |
1089 | return; | |
1090 | ||
1091 | if(delta_x > delta_y) | |
1092 | draw_line_span_horizontal(decrement); | |
1093 | else | |
1094 | draw_line_span_vertical(decrement); | |
1095 | } | |
1096 | else | |
1097 | { | |
1098 | if(delta_y >= 512) | |
1099 | return; | |
1100 | ||
1101 | if(delta_x > delta_y) | |
1102 | draw_line_span_horizontal(increment); | |
1103 | else | |
1104 | draw_line_span_vertical(increment); | |
1105 | } | |
1106 | } | |
1107 | ||
1108 | ||
1109 | void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, | |
1110 | u32 width, u32 height) | |
1111 | { | |
1112 | u32 r = color & 0xFF; | |
1113 | u32 g = (color >> 8) & 0xFF; | |
1114 | u32 b = (color >> 16) & 0xFF; | |
1115 | u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10); | |
1116 | ||
1117 | u16 *vram_ptr = psx_gpu->vram + x + (y * 1024); | |
1118 | u32 draw_x, draw_y; | |
1119 | ||
1120 | for(draw_y = 0; draw_y < height; draw_y++) | |
1121 | { | |
1122 | for(draw_x = 0; draw_x < width; draw_x++) | |
1123 | { | |
1124 | vram_ptr[draw_x] = color_16bpp; | |
1125 | } | |
1126 | ||
1127 | vram_ptr += 1024; | |
1128 | } | |
1129 | } | |
1130 | ||
1131 | void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, | |
1132 | u32 width, u32 height, u32 pitch) | |
1133 | { | |
1134 | u16 *vram_ptr = psx_gpu->vram + x + (y * 1024); | |
1135 | u32 draw_x, draw_y; | |
1136 | ||
1137 | for(draw_y = 0; draw_y < height; draw_y++) | |
1138 | { | |
1139 | for(draw_x = 0; draw_x < width; draw_x++) | |
1140 | { | |
1141 | vram_ptr[draw_x] = source[draw_x]; | |
1142 | } | |
1143 | ||
1144 | source += pitch; | |
1145 | vram_ptr += 1024; | |
1146 | } | |
1147 | } | |
1148 | ||
1149 | void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y, | |
1150 | u32 dest_x, u32 dest_y, u32 width, u32 height) | |
1151 | { | |
1152 | render_block_copy(psx_gpu, psx_gpu->vram + source_x + (source_y * 1024), | |
1153 | dest_x, dest_y, width, height, 1024); | |
1154 | } | |
1155 | ||
1156 | void initialize_psx_gpu(psx_gpu_struct *psx_gpu) | |
1157 | { | |
1158 | psx_gpu->pixel_count_mode = 0; | |
1159 | psx_gpu->pixel_compare_mode = 0; | |
1160 | ||
1161 | psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512); | |
1162 | psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512); | |
1163 | memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512); | |
1164 | memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512); | |
1165 | psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512); | |
1166 | } |