add NEON GPU rasterizer
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_standard.c
CommitLineData
75e28f62
E
1/*
2 * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of
7 * the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <malloc.h>
18#include <math.h>
19
20#include "common.h"
21
22typedef s32 fixed_type;
23
24#define EDGE_STEP_BITS 32
25#define FIXED_BITS 12
26
27#define fixed_center(value) \
28 ((((fixed_type)value) << FIXED_BITS) + (1 << (FIXED_BITS - 1))) \
29
30#define int_to_fixed(value) \
31 (((fixed_type)value) << FIXED_BITS) \
32
33#define fixed_to_int(value) \
34 ((value) >> FIXED_BITS) \
35
36#define fixed_mul(_a, _b) \
37 (((s64)(_a) * (_b)) >> FIXED_BITS) \
38
39#define fixed_to_double(value) \
40 ((value) / (double)(1 << FIXED_BITS)) \
41
42#define double_to_fixed(value) \
43 (fixed_type)(((value) * (double)(1 << FIXED_BITS))) \
44
45typedef struct
46{
47 fixed_type current_value;
48 fixed_type step_dx;
49 fixed_type step_dy;
50 fixed_type gradient_area_x;
51 fixed_type gradient_area_y;
52} interpolant_struct;
53
54typedef struct
55{
56 s32 base_x;
57
58 s64 left_x;
59 s64 left_dx_dy;
60
61 s64 right_x;
62 s64 right_dx_dy;
63
64 u32 triangle_area;
65 u32 triangle_winding;
66
67 interpolant_struct u;
68 interpolant_struct v;
69 interpolant_struct r;
70 interpolant_struct g;
71 interpolant_struct b;
72} _span_struct;
73
74
75u32 span_pixels = 0;
76u32 span_pixel_blocks = 0;
77u32 spans = 0;
78u32 triangles = 0;
79
80u32 texels_4bpp = 0;
81u32 texels_8bpp = 0;
82u32 texels_16bpp = 0;
83u32 untextured_pixels = 0;
84u32 blend_pixels = 0;
85u32 transparent_pixels = 0;
86
87u32 state_changes = 0;
88u32 render_buffer_flushes = 0;
89u32 trivial_rejects = 0;
90
91void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
92{
93
94}
95
96
97u32 fixed_reciprocal(u32 denominator, u32 *_shift)
98{
99 u32 shift = __builtin_clz(denominator);
100 u32 denominator_normalized = denominator << shift;
101
102 // Implement with a DP divide
103 u32 reciprocal =
104 (double)((1ULL << 62) + (denominator_normalized - 1)) /
105 (double)denominator_normalized;
106
107 *_shift = 62 - shift;
108 return reciprocal;
109}
110
111fixed_type fixed_reciprocal_multiply(s32 numerator, u32 reciprocal,
112 u32 reciprocal_sign, u32 shift)
113{
114 u32 numerator_sign = (u32)numerator >> 31;
115 u32 flip_sign = numerator_sign ^ reciprocal_sign;
116 u32 flip_sign_mask = ~(flip_sign - 1);
117 fixed_type value;
118
119 numerator = abs(numerator);
120
121 value = ((u64)numerator * reciprocal) >> shift;
122
123 value ^= flip_sign_mask;
124 value -= flip_sign_mask;
125
126 return value;
127}
128
129s32 triangle_signed_area_x2(s32 x0, s32 y0, s32 x1, s32 y1, s32 x2, s32 y2)
130{
131 return ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0));
132}
133
134u32 fetch_texel_4bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
135{
136 u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
137 u32 texel = texture_ptr_8bpp[(v * 2048) + (u / 2)];
138
139 if(u & 1)
140 texel >>= 4;
141 else
142 texel &= 0xF;
143
144 texels_4bpp++;
145
146 return psx_gpu->clut_ptr[texel];
147}
148
149u32 fetch_texel_8bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
150{
151 u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
152 u32 texel = texture_ptr_8bpp[(v * 2048) + u];
153
154 texels_8bpp++;
155
156 return psx_gpu->clut_ptr[texel];
157}
158
159u32 fetch_texel_16bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
160{
161 u16 *texture_ptr_16bpp = psx_gpu->texture_page_ptr;
162
163 texels_16bpp++;
164
165 return texture_ptr_16bpp[(v * 1024) + u];
166}
167
168u32 fetch_texel(psx_gpu_struct *psx_gpu, u32 u, u32 v)
169{
170 u &= psx_gpu->texture_mask_width;
171 v &= psx_gpu->texture_mask_height;
172
173 switch(psx_gpu->texture_mode)
174 {
175 case TEXTURE_MODE_4BPP:
176 return fetch_texel_4bpp(psx_gpu, u, v);
177
178 case TEXTURE_MODE_8BPP:
179 return fetch_texel_8bpp(psx_gpu, u, v);
180
181 case TEXTURE_MODE_16BPP:
182 return fetch_texel_16bpp(psx_gpu, u, v);
183 }
184
185 return 0;
186}
187
188void draw_pixel(psx_gpu_struct *psx_gpu, s32 r, s32 g, s32 b, u32 texel,
189 u32 x, u32 y, u32 flags)
190{
191 u32 pixel;
192
193 if(r > 31)
194 r = 31;
195
196 if(g > 31)
197 g = 31;
198
199 if(b > 31)
200 b = 31;
201
202 if(flags & RENDER_FLAGS_BLEND)
203 {
204 if(((flags & RENDER_FLAGS_TEXTURE_MAP) == 0) || (texel & 0x8000))
205 {
206 s32 fb_pixel = psx_gpu->vram[(y * 1024) + x];
207 s32 fb_r = fb_pixel & 0x1F;
208 s32 fb_g = (fb_pixel >> 5) & 0x1F;
209 s32 fb_b = (fb_pixel >> 10) & 0x1F;
210
211 blend_pixels++;
212
213 switch(psx_gpu->blend_mode)
214 {
215 case BLEND_MODE_AVERAGE:
216 r = (r + fb_r) / 2;
217 g = (g + fb_g) / 2;
218 b = (b + fb_b) / 2;
219 break;
220
221 case BLEND_MODE_ADD:
222 r += fb_r;
223 g += fb_g;
224 b += fb_b;
225
226 if(r > 31)
227 r = 31;
228
229 if(g > 31)
230 g = 31;
231
232 if(b > 31)
233 b = 31;
234
235 break;
236
237 case BLEND_MODE_SUBTRACT:
238 r = fb_r - r;
239 g = fb_g - g;
240 b = fb_b - b;
241
242 if(r < 0)
243 r = 0;
244
245 if(g < 0)
246 g = 0;
247
248 if(b < 0)
249 b = 0;
250
251 break;
252
253 case BLEND_MODE_ADD_FOURTH:
254 r = fb_r + (r / 4);
255 g = fb_g + (g / 4);
256 b = fb_b + (b / 4);
257
258 if(r > 31)
259 r = 31;
260
261 if(g > 31)
262 g = 31;
263
264 if(b > 31)
265 b = 31;
266
267 break;
268 }
269 }
270 }
271
272 pixel = r | (g << 5) | (b << 10);
273
274 if(psx_gpu->mask_apply || (texel & 0x8000))
275 pixel |= 0x8000;
276
277 psx_gpu->vram[(y * 1024) + x] = pixel;
278}
279
280s32 dither_table[4][4] =
281{
282 { -4, 0, -3, 1 },
283 { 2, -2, 3, -1 },
284 { -3, 1, -4, 0 },
285 { 3, -1, 2, -2 },
286};
287
288void render_span(psx_gpu_struct *psx_gpu, _span_struct *span, s32 y,
289 u32 flags)
290{
291 s32 left_x = span->left_x >> EDGE_STEP_BITS;
292 s32 right_x = span->right_x >> EDGE_STEP_BITS;
293 s32 current_x = left_x;
294 s32 delta_x;
295
296 fixed_type current_u = span->u.current_value;
297 fixed_type current_v = span->v.current_value;
298 fixed_type current_r = span->r.current_value;
299 fixed_type current_g = span->g.current_value;
300 fixed_type current_b = span->b.current_value;
301
302 if(y < psx_gpu->viewport_start_y)
303 return;
304
305 if(y > psx_gpu->viewport_end_y)
306 return;
307
308 if(right_x < psx_gpu->viewport_start_x)
309 return;
310
311 if(current_x > psx_gpu->viewport_end_x)
312 return;
313
314 spans++;
315
316 if(current_x < psx_gpu->viewport_start_x)
317 current_x = psx_gpu->viewport_start_x;
318
319 if(right_x > psx_gpu->viewport_end_x + 1)
320 right_x = psx_gpu->viewport_end_x + 1;
321
322 delta_x = current_x - span->base_x;
323
324 current_u += delta_x * span->u.step_dx;
325 current_v += delta_x * span->v.step_dx;
326 current_r += delta_x * span->r.step_dx;
327 current_g += delta_x * span->g.step_dx;
328 current_b += delta_x * span->b.step_dx;
329
330 span_pixels += right_x - current_x;
331 span_pixel_blocks += ((right_x / 8) - (current_x / 8)) + 1;
332
333 while(current_x < right_x)
334 {
335 s32 color_r, color_g, color_b;
336 u32 texel = 0;
337
338 if(psx_gpu->mask_evaluate &&
339 (psx_gpu->vram[(y * 1024) + current_x] & 0x8000))
340 {
341 goto skip_pixel;
342 }
343
344 if(flags & RENDER_FLAGS_SHADE)
345 {
346 color_r = fixed_to_int(current_r);
347 color_g = fixed_to_int(current_g);
348 color_b = fixed_to_int(current_b);
349 }
350 else
351 {
352 color_r = psx_gpu->primitive_color & 0xFF;
353 color_g = (psx_gpu->primitive_color >> 8) & 0xFF;
354 color_b = (psx_gpu->primitive_color >> 16) & 0xFF;
355 }
356
357 if(flags & RENDER_FLAGS_TEXTURE_MAP)
358 {
359 u32 texel_r, texel_g, texel_b;
360 u32 u = fixed_to_int(current_u);
361 u32 v = fixed_to_int(current_v);
362
363 texel = fetch_texel(psx_gpu, u, v);
364
365 if(texel == 0)
366 {
367 transparent_pixels++;
368 goto skip_pixel;
369 }
370
371 texel_r = texel & 0x1F;
372 texel_g = (texel >> 5) & 0x1F;
373 texel_b = (texel >> 10) & 0x1F;
374
375 if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)
376 {
377 color_r *= texel_r;
378 color_g *= texel_g;
379 color_b *= texel_b;
380 }
381 else
382 {
383 color_r = texel_r << 7;
384 color_g = texel_g << 7;
385 color_b = texel_b << 7;
386 }
387
388 color_r >>= 4;
389 color_g >>= 4;
390 color_b >>= 4;
391 }
392 else
393 {
394 untextured_pixels++;
395 }
396
397 if(psx_gpu->dither_mode && ((flags & RENDER_FLAGS_SHADE) ||
398 ((flags & RENDER_FLAGS_TEXTURE_MAP) &&
399 ((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0))))
400 {
401 s32 dither_offset = dither_table[y % 4][current_x % 4];
402 color_r += dither_offset;
403 color_g += dither_offset;
404 color_b += dither_offset;
405
406 if(color_r < 0)
407 color_r = 0;
408
409 if(color_g < 0)
410 color_g = 0;
411
412 if(color_b < 0)
413 color_b = 0;
414 }
415
416 color_r >>= 3;
417 color_g >>= 3;
418 color_b >>= 3;
419
420 draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x, y, flags);
421
422 skip_pixel:
423
424 current_u += span->u.step_dx;
425 current_v += span->v.step_dx;
426 current_r += span->r.step_dx;
427 current_g += span->g.step_dx;
428 current_b += span->b.step_dx;
429
430 current_x++;
431 }
432}
433
434void increment_span(_span_struct *span)
435{
436 span->left_x += span->left_dx_dy;
437 span->right_x += span->right_dx_dy;
438
439 span->u.current_value += span->u.step_dy;
440 span->v.current_value += span->v.step_dy;
441 span->r.current_value += span->r.step_dy;
442 span->g.current_value += span->g.step_dy;
443 span->b.current_value += span->b.step_dy;
444}
445
446void decrement_span(_span_struct *span)
447{
448 span->left_x += span->left_dx_dy;
449 span->right_x += span->right_dx_dy;
450
451 span->u.current_value -= span->u.step_dy;
452 span->v.current_value -= span->v.step_dy;
453 span->r.current_value -= span->r.step_dy;
454 span->g.current_value -= span->g.step_dy;
455 span->b.current_value -= span->b.step_dy;
456}
457
458
459#define compute_gradient_area_x(interpolant) \
460{ \
461 span.interpolant.gradient_area_x = \
462 triangle_signed_area_x2(a->interpolant, a->y, b->interpolant, b->y, \
463 c->interpolant, c->y); \
464} \
465
466#define compute_gradient_area_y(interpolant) \
467{ \
468 span.interpolant.gradient_area_y = \
469 triangle_signed_area_x2(a->x, a->interpolant, b->x, b->interpolant, \
470 c->x, c->interpolant); \
471} \
472
473#define compute_all_gradient_areas() \
474 compute_gradient_area_x(u); \
475 compute_gradient_area_x(v); \
476 compute_gradient_area_x(r); \
477 compute_gradient_area_x(g); \
478 compute_gradient_area_x(b); \
479 compute_gradient_area_y(u); \
480 compute_gradient_area_y(v); \
481 compute_gradient_area_y(r); \
482 compute_gradient_area_y(g); \
483 compute_gradient_area_y(b) \
484
485#define set_interpolant_base(interpolant, base_vertex) \
486 span->interpolant.step_dx = \
487 fixed_reciprocal_multiply(span->interpolant.gradient_area_x, reciprocal, \
488 span->triangle_winding, shift); \
489 span->interpolant.step_dy = \
490 fixed_reciprocal_multiply(span->interpolant.gradient_area_y, reciprocal, \
491 span->triangle_winding, shift); \
492 span->interpolant.current_value = fixed_center(base_vertex->interpolant) \
493
494#define set_interpolant_bases(base_vertex) \
495{ \
496 u32 shift; \
497 u32 reciprocal = fixed_reciprocal(span->triangle_area, &shift); \
498 shift -= FIXED_BITS; \
499 set_interpolant_base(u, base_vertex); \
500 set_interpolant_base(v, base_vertex); \
501 set_interpolant_base(r, base_vertex); \
502 set_interpolant_base(g, base_vertex); \
503 set_interpolant_base(b, base_vertex); \
504 span->base_x = span->left_x >> EDGE_STEP_BITS; \
505} \
506
507#define compute_edge_delta(edge, start, end, height) \
508{ \
509 s32 x_start = start->x; \
510 s32 x_end = end->x; \
511 s32 width = x_end - x_start; \
512 \
513 s32 shift = __builtin_clz(height); \
514 u32 height_normalized = height << shift; \
515 u32 height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / \
516 height_normalized; \
517 \
518 shift -= (50 - EDGE_STEP_BITS); \
519 \
520 span->edge##_x = \
521 ((((s64)x_start * height) + (height - 1)) * height_reciprocal) << shift; \
522 span->edge##_dx_dy = ((s64)width * height_reciprocal) << shift; \
523} \
524
525
526#define render_spans_up(height) \
527 do \
528 { \
529 decrement_span(span); \
530 render_span(psx_gpu, span, current_y, flags); \
531 current_y--; \
532 height--; \
533 } while(height) \
534
535#define render_spans_down(height) \
536 do \
537 { \
538 render_span(psx_gpu, span, current_y, flags); \
539 increment_span(span); \
540 current_y++; \
541 height--; \
542 } while(height) \
543
544#define render_spans_up_up(minor, major) \
545 s32 current_y = bottom->y - 1; \
546 s32 height_minor_a = bottom->y - middle->y; \
547 s32 height_minor_b = middle->y - top->y; \
548 s32 height_major = height_minor_a + height_minor_b; \
549 \
550 compute_edge_delta(major, bottom, top, height_major); \
551 compute_edge_delta(minor, bottom, middle, height_minor_a); \
552 set_interpolant_bases(bottom); \
553 \
554 render_spans_up(height_minor_a); \
555 \
556 compute_edge_delta(minor, middle, top, height_minor_b); \
557 render_spans_up(height_minor_b) \
558
559void render_spans_up_left(psx_gpu_struct *psx_gpu, _span_struct *span,
560 vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags)
561{
562 render_spans_up_up(left, right);
563}
564
565void render_spans_up_right(psx_gpu_struct *psx_gpu, _span_struct *span,
566 vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags)
567{
568 render_spans_up_up(right, left);
569}
570
571#define render_spans_down_down(minor, major) \
572 s32 current_y = top->y; \
573 s32 height_minor_a = middle->y - top->y; \
574 s32 height_minor_b = bottom->y - middle->y; \
575 s32 height_major = height_minor_a + height_minor_b; \
576 \
577 compute_edge_delta(minor, top, middle, height_minor_a); \
578 compute_edge_delta(major, top, bottom, height_major); \
579 set_interpolant_bases(top); \
580 \
581 render_spans_down(height_minor_a); \
582 \
583 compute_edge_delta(minor, middle, bottom, height_minor_b); \
584 render_spans_down(height_minor_b) \
585
586void render_spans_down_left(psx_gpu_struct *psx_gpu, _span_struct *span,
587 vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags)
588{
589 render_spans_down_down(left, right);
590}
591
592void render_spans_down_right(psx_gpu_struct *psx_gpu, _span_struct *span,
593 vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags)
594{
595 render_spans_down_down(right, left);
596}
597
598#define render_spans_up_flat(bottom_left, bottom_right, top_left, top_right) \
599 s32 current_y = bottom_left->y - 1; \
600 s32 height = bottom_left->y - top_left->y; \
601 \
602 compute_edge_delta(left, bottom_left, top_left, height); \
603 compute_edge_delta(right, bottom_right, top_right, height); \
604 set_interpolant_bases(bottom_left); \
605 render_spans_up(height) \
606
607void render_spans_up_a(psx_gpu_struct *psx_gpu, _span_struct *span,
608 vertex_struct *bottom_left, vertex_struct *bottom_right, vertex_struct *top,
609 u32 flags)
610{
611 render_spans_up_flat(bottom_left, bottom_right, top, top);
612}
613
614void render_spans_up_b(psx_gpu_struct *psx_gpu, _span_struct *span,
615 vertex_struct *bottom, vertex_struct *top_left, vertex_struct *top_right,
616 u32 flags)
617{
618 render_spans_up_flat(bottom, bottom, top_left, top_right);
619}
620
621#define render_spans_down_flat(top_left, top_right, bottom_left, bottom_right) \
622 s32 current_y = top_left->y; \
623 s32 height = bottom_left->y - top_left->y; \
624 \
625 compute_edge_delta(left, top_left, bottom_left, height); \
626 compute_edge_delta(right, top_right, bottom_right, height); \
627 set_interpolant_bases(top_left); \
628 render_spans_down(height) \
629
630void render_spans_down_a(psx_gpu_struct *psx_gpu, _span_struct *span,
631 vertex_struct *top_left, vertex_struct *top_right, vertex_struct *bottom,
632 u32 flags)
633{
634 render_spans_down_flat(top_left, top_right, bottom, bottom);
635}
636
637void render_spans_down_b(psx_gpu_struct *psx_gpu, _span_struct *span,
638 vertex_struct *top, vertex_struct *bottom_left, vertex_struct *bottom_right,
639 u32 flags)
640{
641 render_spans_down_flat(top, top, bottom_left, bottom_right);
642}
643
644void render_spans_up_down(psx_gpu_struct *psx_gpu, _span_struct *span,
645 vertex_struct *middle, vertex_struct *top, vertex_struct *bottom, u32 flags)
646{
647 s32 middle_y = middle->y;
648 s32 current_y = middle_y - 1;
649 s32 height_minor_a = middle->y - top->y;
650 s32 height_minor_b = bottom->y - middle->y;
651 s32 height_major = height_minor_a + height_minor_b;
652
653 u64 right_x_mid;
654
655 compute_edge_delta(left, middle, top, height_minor_a);
656 compute_edge_delta(right, bottom, top, height_major);
657 set_interpolant_bases(middle);
658
659 right_x_mid = span->right_x + (span->right_dx_dy * height_minor_b);
660 span->right_x = right_x_mid;
661
662 render_spans_up(height_minor_a);
663
664 compute_edge_delta(left, middle, bottom, height_minor_b);
665 set_interpolant_bases(middle);
666
667 span->right_dx_dy *= -1;
668 span->right_x = right_x_mid;
669 current_y = middle_y;
670
671 render_spans_down(height_minor_b);
672}
673
674#define vertex_swap(_a, _b) \
675{ \
676 vertex_struct *temp_vertex = _a; \
677 _a = _b; \
678 _b = temp_vertex; \
679 triangle_winding ^= 1; \
680} \
681
682
683#define triangle_y_direction_up 1
684#define triangle_y_direction_flat 2
685#define triangle_y_direction_down 0
686
687#define triangle_winding_positive 0
688#define triangle_winding_negative 1
689
690#define triangle_set_direction(direction_variable, value) \
691 u32 direction_variable = (u32)(value) >> 31; \
692 if(value == 0) \
693 direction_variable = 2 \
694
695#define triangle_case(direction_a, direction_b, direction_c, winding) \
696 case (triangle_y_direction_##direction_a | \
697 (triangle_y_direction_##direction_b << 2) | \
698 (triangle_y_direction_##direction_c << 4) | \
699 (triangle_winding_##winding << 6)) \
700
701
702void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
703 u32 flags)
704{
705 s32 triangle_area;
706 u32 triangle_winding = 0;
707 _span_struct span;
708
709 vertex_struct *a = &(vertexes[0]);
710 vertex_struct *b = &(vertexes[1]);
711 vertex_struct *c = &(vertexes[2]);
712
713 triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y);
714
715 triangles++;
716
717 if(triangle_area == 0)
718 return;
719
720 if(b->y < a->y)
721 vertex_swap(a, b);
722
723 if(c->y < b->y)
724 {
725 vertex_swap(b, c);
726
727 if(b->y < a->y)
728 vertex_swap(a, b);
729 }
730
731 if((c->y - a->y) >= 512)
732 return;
733
734 if(triangle_area < 0)
735 {
736 triangle_area = -triangle_area;
737 triangle_winding ^= 1;
738 vertex_swap(a, c);
739 }
740
741 if(b->x < a->x)
742 vertex_swap(a, b);
743
744 if(c->x < b->x)
745 {
746 vertex_swap(b, c);
747
748 if(b->x < a->x)
749 vertex_swap(a, b);
750 }
751
752 if((c->x - a->x) >= 1024)
753 return;
754
755 s32 y_delta_a = b->y - a->y;
756 s32 y_delta_b = c->y - b->y;
757 s32 y_delta_c = c->y - a->y;
758
759 triangle_set_direction(y_direction_a, y_delta_a);
760 triangle_set_direction(y_direction_b, y_delta_b);
761 triangle_set_direction(y_direction_c, y_delta_c);
762
763 compute_all_gradient_areas();
764 span.triangle_area = triangle_area;
765 span.triangle_winding = triangle_winding;
766
767 switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
768 (triangle_winding << 6))
769 {
770 triangle_case(up, up, up, negative):
771 triangle_case(up, up, flat, negative):
772 triangle_case(up, up, down, negative):
773 render_spans_up_right(psx_gpu, &span, a, b, c, flags);
774 break;
775
776 triangle_case(flat, up, up, negative):
777 triangle_case(flat, up, flat, negative):
778 triangle_case(flat, up, down, negative):
779 render_spans_up_a(psx_gpu, &span, a, b, c, flags);
780 break;
781
782 triangle_case(down, up, up, negative):
783 render_spans_up_down(psx_gpu, &span, a, c, b, flags);
784 break;
785
786 triangle_case(down, up, flat, negative):
787 render_spans_down_a(psx_gpu, &span, a, c, b, flags);
788 break;
789
790 triangle_case(down, up, down, negative):
791 render_spans_down_right(psx_gpu, &span, a, c, b, flags);
792 break;
793
794 triangle_case(down, flat, up, negative):
795 triangle_case(down, flat, flat, negative):
796 triangle_case(down, flat, down, negative):
797 render_spans_down_b(psx_gpu, &span, a, b, c, flags);
798 break;
799
800 triangle_case(down, down, up, negative):
801 triangle_case(down, down, flat, negative):
802 triangle_case(down, down, down, negative):
803 render_spans_down_left(psx_gpu, &span, a, b, c, flags);
804 break;
805
806 triangle_case(up, up, up, positive):
807 triangle_case(up, up, flat, positive):
808 triangle_case(up, up, down, positive):
809 render_spans_up_left(psx_gpu, &span, a, b, c, flags);
810 break;
811
812 triangle_case(up, flat, up, positive):
813 triangle_case(up, flat, flat, positive):
814 triangle_case(up, flat, down, positive):
815 render_spans_up_b(psx_gpu, &span, a, b, c, flags);
816 break;
817
818 triangle_case(up, down, up, positive):
819 render_spans_up_right(psx_gpu, &span, a, c, b, flags);
820 break;
821
822 triangle_case(up, down, flat, positive):
823 render_spans_up_a(psx_gpu, &span, a, c, b, flags);
824 break;
825
826 triangle_case(up, down, down, positive):
827 render_spans_up_down(psx_gpu, &span, a, b, c, flags);
828 break;
829
830 triangle_case(flat, down, up, positive):
831 triangle_case(flat, down, flat, positive):
832 triangle_case(flat, down, down, positive):
833 render_spans_down_a(psx_gpu, &span, a, b, c, flags);
834 break;
835
836 triangle_case(down, down, up, positive):
837 triangle_case(down, down, flat, positive):
838 triangle_case(down, down, down, positive):
839 render_spans_down_right(psx_gpu, &span, a, b, c, flags);
840 break;
841 }
842
843}
844
845
846void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
847 s32 width, s32 height, u32 flags)
848{
849 // TODO: Flip/mirror
850 s32 current_x, current_y;
851 u32 current_u, current_v;
852 u32 primitive_color = psx_gpu->primitive_color;
853 u32 sprite_r, sprite_g, sprite_b;
854 s32 color_r = 0;
855 s32 color_g = 0;
856 s32 color_b = 0;
857 u32 texel = 0;
858
859 sprite_r = primitive_color & 0xFF;
860 sprite_g = (primitive_color >> 8) & 0xFF;
861 sprite_b = (primitive_color >> 16) & 0xFF;
862
863 static u32 sprites = 0;
864
865 sprites++;
866
867 for(current_y = y, current_v = v;
868 current_y < y + height; current_y++, current_v++)
869 {
870 for(current_x = x, current_u = u;
871 current_x < x + width; current_x++, current_u++)
872 {
873 if((current_x >= psx_gpu->viewport_start_x) &&
874 (current_y >= psx_gpu->viewport_start_y) &&
875 (current_x <= psx_gpu->viewport_end_x) &&
876 (current_y <= psx_gpu->viewport_end_y))
877 {
878 if(psx_gpu->mask_evaluate &&
879 (psx_gpu->vram[(y * 1024) + current_x] & 0x8000))
880 {
881 continue;
882 }
883
884 if(flags & RENDER_FLAGS_TEXTURE_MAP)
885 {
886 texel = fetch_texel(psx_gpu, current_u, current_v);
887 if(texel == 0)
888 continue;
889
890 color_r = texel & 0x1F;
891 color_g = (texel >> 5) & 0x1F;
892 color_b = (texel >> 10) & 0x1F;
893
894 if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)
895 {
896 color_r *= sprite_r;
897 color_g *= sprite_g;
898 color_b *= sprite_b;
899
900 color_r >>= 7;
901 color_g >>= 7;
902 color_b >>= 7;
903 }
904 }
905 else
906 {
907 color_r = sprite_r >> 3;
908 color_g = sprite_g >> 3;
909 color_b = sprite_b >> 3;
910 }
911
912 draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x,
913 current_y, flags);
914 }
915 }
916 }
917}
918
919
920#define draw_pixel_line(_x, _y) \
921 if((_x >= psx_gpu->viewport_start_x) && (_y >= psx_gpu->viewport_start_y) && \
922 (_x <= psx_gpu->viewport_end_x) && (_y <= psx_gpu->viewport_end_y)) \
923 { \
924 if(flags & RENDER_FLAGS_SHADE) \
925 { \
926 color_r = fixed_to_int(current_r); \
927 color_g = fixed_to_int(current_g); \
928 color_b = fixed_to_int(current_b); \
929 \
930 current_r += gradient_r; \
931 current_g += gradient_g; \
932 current_b += gradient_b; \
933 } \
934 else \
935 { \
936 color_r = primitive_color & 0xFF; \
937 color_g = (primitive_color >> 8) & 0xFF; \
938 color_b = (primitive_color >> 16) & 0xFF; \
939 } \
940 \
941 if(psx_gpu->dither_mode) \
942 { \
943 s32 dither_offset = dither_table[_y % 4][_x % 4]; \
944 \
945 color_r += dither_offset; \
946 color_g += dither_offset; \
947 color_b += dither_offset; \
948 \
949 if(color_r < 0) \
950 color_r = 0; \
951 \
952 if(color_g < 0) \
953 color_g = 0; \
954 \
955 if(color_b < 0) \
956 color_b = 0; \
957 } \
958 color_r >>= 3; \
959 color_g >>= 3; \
960 color_b >>= 3; \
961 \
962 span_pixels++; \
963 \
964 draw_pixel(psx_gpu, color_r, color_g, color_b, 0, _x, _y, flags); \
965 } \
966
967#define update_increment(value) \
968 value++ \
969
970#define update_decrement(value) \
971 value-- \
972
973#define compare_increment(a, b) \
974 (a <= b) \
975
976#define compare_decrement(a, b) \
977 (a >= b) \
978
979#define set_line_gradients(minor) \
980{ \
981 s32 gradient_divisor = delta_##minor; \
982 gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor; \
983 gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor; \
984 gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor; \
985 current_r = fixed_center(vertex_a->r); \
986 current_g = fixed_center(vertex_a->g); \
987 current_b = fixed_center(vertex_a->b); \
988}
989
990#define draw_line_span_horizontal(direction) \
991do \
992{ \
993 error_step = delta_y * 2; \
994 error_wrap = delta_x * 2; \
995 error = delta_x; \
996 \
997 current_y = y_a; \
998 set_line_gradients(x); \
999 \
1000 for(current_x = x_a; current_x <= x_b; current_x++) \
1001 { \
1002 draw_pixel_line(current_x, current_y); \
1003 error += error_step; \
1004 \
1005 if(error >= error_wrap) \
1006 { \
1007 update_##direction(current_y); \
1008 error -= error_wrap; \
1009 } \
1010 } \
1011} while(0) \
1012
1013#define draw_line_span_vertical(direction) \
1014do \
1015{ \
1016 error_step = delta_x * 2; \
1017 error_wrap = delta_y * 2; \
1018 error = delta_y; \
1019 \
1020 current_x = x_a; \
1021 set_line_gradients(y); \
1022 \
1023 for(current_y = y_a; compare_##direction(current_y, y_b); \
1024 update_##direction(current_y)) \
1025 { \
1026 draw_pixel_line(current_x, current_y); \
1027 error += error_step; \
1028 \
1029 if(error > error_wrap) \
1030 { \
1031 current_x++; \
1032 error -= error_wrap; \
1033 } \
1034 } \
1035} while(0) \
1036
1037void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags)
1038{
1039 u32 primitive_color = psx_gpu->primitive_color;
1040 s32 color_r, color_g, color_b;
1041
1042 fixed_type gradient_r = 0;
1043 fixed_type gradient_g = 0;
1044 fixed_type gradient_b = 0;
1045 fixed_type current_r = 0;
1046 fixed_type current_g = 0;
1047 fixed_type current_b = 0;
1048
1049 s32 y_a, y_b;
1050 s32 x_a, x_b;
1051
1052 s32 delta_x, delta_y;
1053 u32 triangle_winding = 0;
1054
1055 s32 current_x;
1056 s32 current_y;
1057
1058 u32 error_step;
1059 u32 error;
1060 u32 error_wrap;
1061
1062 vertex_struct *vertex_a = &(vertexes[0]);
1063 vertex_struct *vertex_b = &(vertexes[1]);
1064
1065 if(vertex_a->x >= vertex_b->x)
1066 {
1067 vertex_swap(vertex_a, vertex_b);
1068 }
1069
1070 x_a = vertex_a->x;
1071 x_b = vertex_b->x;
1072
1073 y_a = vertex_a->y;
1074 y_b = vertex_b->y;
1075
1076 delta_x = x_b - x_a;
1077 delta_y = y_b - y_a;
1078
1079 if(delta_x >= 1024)
1080 return;
1081
1082 flags &= ~RENDER_FLAGS_TEXTURE_MAP;
1083
1084 if(delta_y < 0)
1085 {
1086 delta_y *= -1;
1087
1088 if(delta_y >= 512)
1089 return;
1090
1091 if(delta_x > delta_y)
1092 draw_line_span_horizontal(decrement);
1093 else
1094 draw_line_span_vertical(decrement);
1095 }
1096 else
1097 {
1098 if(delta_y >= 512)
1099 return;
1100
1101 if(delta_x > delta_y)
1102 draw_line_span_horizontal(increment);
1103 else
1104 draw_line_span_vertical(increment);
1105 }
1106}
1107
1108
1109void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
1110 u32 width, u32 height)
1111{
1112 u32 r = color & 0xFF;
1113 u32 g = (color >> 8) & 0xFF;
1114 u32 b = (color >> 16) & 0xFF;
1115 u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
1116
1117 u16 *vram_ptr = psx_gpu->vram + x + (y * 1024);
1118 u32 draw_x, draw_y;
1119
1120 for(draw_y = 0; draw_y < height; draw_y++)
1121 {
1122 for(draw_x = 0; draw_x < width; draw_x++)
1123 {
1124 vram_ptr[draw_x] = color_16bpp;
1125 }
1126
1127 vram_ptr += 1024;
1128 }
1129}
1130
1131void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
1132 u32 width, u32 height, u32 pitch)
1133{
1134 u16 *vram_ptr = psx_gpu->vram + x + (y * 1024);
1135 u32 draw_x, draw_y;
1136
1137 for(draw_y = 0; draw_y < height; draw_y++)
1138 {
1139 for(draw_x = 0; draw_x < width; draw_x++)
1140 {
1141 vram_ptr[draw_x] = source[draw_x];
1142 }
1143
1144 source += pitch;
1145 vram_ptr += 1024;
1146 }
1147}
1148
1149void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y,
1150 u32 dest_x, u32 dest_y, u32 width, u32 height)
1151{
1152 render_block_copy(psx_gpu, psx_gpu->vram + source_x + (source_y * 1024),
1153 dest_x, dest_y, width, height, 1024);
1154}
1155
1156void initialize_psx_gpu(psx_gpu_struct *psx_gpu)
1157{
1158 psx_gpu->pixel_count_mode = 0;
1159 psx_gpu->pixel_compare_mode = 0;
1160
1161 psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512);
1162 psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512);
1163 memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512);
1164 memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512);
1165 psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512);
1166}