Merge pull request #123 from gameblabla/diablofix_hack
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_standard.c
1 /*
2  * Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of
7  * the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  */
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <malloc.h>
18 #include <math.h>
19
20 #include "common.h"
21
22 typedef s32 fixed_type;
23
24 #define EDGE_STEP_BITS 32
25 #define FIXED_BITS     12
26
27 #define fixed_center(value)                                                    \
28   ((((fixed_type)value) << FIXED_BITS) + (1 << (FIXED_BITS - 1)))              \
29
30 #define int_to_fixed(value)                                                    \
31   (((fixed_type)value) << FIXED_BITS)                                          \
32
33 #define fixed_to_int(value)                                                    \
34   ((value) >> FIXED_BITS)                                                      \
35
36 #define fixed_mul(_a, _b)                                                      \
37   (((s64)(_a) * (_b)) >> FIXED_BITS)                                           \
38
39 #define fixed_to_double(value)                                                 \
40   ((value) / (double)(1 << FIXED_BITS))                                        \
41
42 #define double_to_fixed(value)                                                 \
43   (fixed_type)(((value) * (double)(1 << FIXED_BITS)))                          \
44
45 typedef struct
46 {
47   fixed_type current_value;
48   fixed_type step_dx;
49   fixed_type step_dy;
50   fixed_type gradient_area_x;
51   fixed_type gradient_area_y;
52 } interpolant_struct;
53
54 typedef struct
55 {
56   s32 base_x;
57
58   s64 left_x;
59   s64 left_dx_dy;
60
61   s64 right_x;
62   s64 right_dx_dy;
63
64   u32 triangle_area;
65   u32 triangle_winding;
66
67   interpolant_struct u;
68   interpolant_struct v;
69   interpolant_struct r;
70   interpolant_struct g;
71   interpolant_struct b;
72 } _span_struct;
73
74
75 u32 span_pixels = 0;
76 u32 span_pixel_blocks = 0;
77 u32 spans = 0;
78 u32 triangles = 0;
79
80 u32 texels_4bpp = 0;
81 u32 texels_8bpp = 0;
82 u32 texels_16bpp = 0;
83 u32 untextured_pixels = 0;
84 u32 blend_pixels = 0;
85 u32 transparent_pixels = 0;
86
87 u32 state_changes = 0;
88 u32 render_buffer_flushes = 0;
89 u32 trivial_rejects = 0;
90
91 void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
92 {
93
94 }
95
96
97 u32 fixed_reciprocal(u32 denominator, u32 *_shift)
98 {
99   u32 shift = __builtin_clz(denominator);
100   u32 denominator_normalized = denominator << shift;
101
102   // Implement with a DP divide
103   u32 reciprocal =
104    (double)((1ULL << 62) + (denominator_normalized - 1)) / 
105    (double)denominator_normalized;
106
107   *_shift = 62 - shift;
108   return reciprocal;
109 }
110
111 fixed_type fixed_reciprocal_multiply(s32 numerator, u32 reciprocal,
112  u32 reciprocal_sign, u32 shift)
113 {
114   u32 numerator_sign = (u32)numerator >> 31;
115   u32 flip_sign = numerator_sign ^ reciprocal_sign;
116   u32 flip_sign_mask = ~(flip_sign - 1);
117   fixed_type value;
118
119   numerator = abs(numerator);
120
121   value = ((u64)numerator * reciprocal) >> shift;
122
123   value ^= flip_sign_mask;
124   value -= flip_sign_mask;
125
126   return value;
127 }
128
129 s32 triangle_signed_area_x2(s32 x0, s32 y0, s32 x1, s32 y1, s32 x2, s32 y2)
130 {
131         return ((x1 - x0) * (y2 - y1)) - ((x2 - x1) * (y1 - y0));
132 }
133
134 u32 fetch_texel_4bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
135 {
136   u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
137   u32 texel = texture_ptr_8bpp[(v * 2048) + (u / 2)];
138
139   if(u & 1)
140     texel >>= 4;
141   else
142     texel &= 0xF;
143
144   texels_4bpp++;
145
146   return psx_gpu->clut_ptr[texel];
147 }
148
149 u32 fetch_texel_8bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
150 {
151   u8 *texture_ptr_8bpp = psx_gpu->texture_page_ptr;
152   u32 texel = texture_ptr_8bpp[(v * 2048) + u];
153
154   texels_8bpp++;
155
156   return psx_gpu->clut_ptr[texel];
157 }
158
159 u32 fetch_texel_16bpp(psx_gpu_struct *psx_gpu, u32 u, u32 v)
160 {
161   u16 *texture_ptr_16bpp = psx_gpu->texture_page_ptr;
162
163   texels_16bpp++;
164
165   return texture_ptr_16bpp[(v * 1024) + u];
166 }
167
168 u32 fetch_texel(psx_gpu_struct *psx_gpu, u32 u, u32 v)
169 {
170   u &= psx_gpu->texture_mask_width;
171   v &= psx_gpu->texture_mask_height;
172
173   switch(psx_gpu->texture_mode)
174   {
175     case TEXTURE_MODE_4BPP:
176       return fetch_texel_4bpp(psx_gpu, u, v);
177
178     case TEXTURE_MODE_8BPP:
179       return fetch_texel_8bpp(psx_gpu, u, v);
180
181     case TEXTURE_MODE_16BPP:
182       return fetch_texel_16bpp(psx_gpu, u, v);
183   }
184
185   return 0;
186 }
187
188 void draw_pixel(psx_gpu_struct *psx_gpu, s32 r, s32 g, s32 b, u32 texel,
189  u32 x, u32 y, u32 flags)
190 {
191   u32 pixel;
192
193   if(r > 31)
194     r = 31;
195
196   if(g > 31)
197     g = 31;
198
199   if(b > 31)
200     b = 31;
201
202   if(flags & RENDER_FLAGS_BLEND)
203   {
204     if(((flags & RENDER_FLAGS_TEXTURE_MAP) == 0) || (texel & 0x8000))
205     {
206       s32 fb_pixel = psx_gpu->vram[(y * 1024) + x];
207       s32 fb_r = fb_pixel & 0x1F;
208       s32 fb_g = (fb_pixel >> 5) & 0x1F;
209       s32 fb_b = (fb_pixel >> 10) & 0x1F;
210
211       blend_pixels++;
212
213       switch(psx_gpu->blend_mode)
214       {
215         case BLEND_MODE_AVERAGE:
216           r = (r + fb_r) / 2;
217           g = (g + fb_g) / 2;
218           b = (b + fb_b) / 2;
219           break;
220
221         case BLEND_MODE_ADD:
222           r += fb_r;
223           g += fb_g;
224           b += fb_b;
225
226           if(r > 31)
227             r = 31;
228
229           if(g > 31)
230             g = 31;
231
232           if(b > 31)
233             b = 31;
234
235           break;
236
237         case BLEND_MODE_SUBTRACT:
238           r = fb_r - r;
239           g = fb_g - g;
240           b = fb_b - b;
241
242           if(r < 0)
243             r = 0;
244
245           if(g < 0)
246             g = 0;
247
248           if(b < 0)
249             b = 0;
250
251           break;
252
253         case BLEND_MODE_ADD_FOURTH:
254           r = fb_r + (r / 4);
255           g = fb_g + (g / 4);
256           b = fb_b + (b / 4);
257
258           if(r > 31)
259             r = 31;
260
261           if(g > 31)
262             g = 31;
263
264           if(b > 31)
265             b = 31;
266
267           break;      
268       }
269     }
270   }
271
272   pixel = r | (g << 5) | (b << 10);
273
274   if(psx_gpu->mask_apply || (texel & 0x8000))
275     pixel |= 0x8000;
276
277   psx_gpu->vram[(y * 1024) + x] = pixel;
278 }
279
280 s32 dither_table[4][4] =
281 {
282   { -4,  0, -3,  1 },
283   {  2, -2,  3, -1 },
284   { -3,  1, -4,  0 },
285   {  3, -1,  2, -2 },
286 };
287
288 void render_span(psx_gpu_struct *psx_gpu, _span_struct *span, s32 y,
289  u32 flags)
290 {
291   s32 left_x = span->left_x >> EDGE_STEP_BITS;
292   s32 right_x = span->right_x >> EDGE_STEP_BITS;
293   s32 current_x = left_x;
294   s32 delta_x;
295
296   fixed_type current_u = span->u.current_value;
297   fixed_type current_v = span->v.current_value;
298   fixed_type current_r = span->r.current_value;
299   fixed_type current_g = span->g.current_value;
300   fixed_type current_b = span->b.current_value;
301
302   if(y < psx_gpu->viewport_start_y)
303     return;
304
305   if(y > psx_gpu->viewport_end_y)
306     return;
307
308   if(right_x < psx_gpu->viewport_start_x)
309     return;
310
311   if(current_x > psx_gpu->viewport_end_x)
312     return;
313
314   spans++;
315
316   if(current_x < psx_gpu->viewport_start_x)
317     current_x = psx_gpu->viewport_start_x;  
318
319   if(right_x > psx_gpu->viewport_end_x + 1)
320     right_x = psx_gpu->viewport_end_x + 1;
321
322   delta_x = current_x - span->base_x;
323
324   current_u += delta_x * span->u.step_dx;
325   current_v += delta_x * span->v.step_dx;
326   current_r += delta_x * span->r.step_dx;
327   current_g += delta_x * span->g.step_dx;
328   current_b += delta_x * span->b.step_dx;
329
330   span_pixels += right_x - current_x;
331   span_pixel_blocks += ((right_x / 8) - (current_x / 8)) + 1;
332
333   while(current_x < right_x)
334   {
335     s32 color_r, color_g, color_b;
336     u32 texel = 0;
337
338     if(psx_gpu->mask_evaluate &&
339      (psx_gpu->vram[(y * 1024) + current_x] & 0x8000))
340     {
341       goto skip_pixel;
342     }
343
344     if(flags & RENDER_FLAGS_SHADE)
345     {
346       color_r = fixed_to_int(current_r);
347       color_g = fixed_to_int(current_g);
348       color_b = fixed_to_int(current_b);
349     }
350     else
351     {
352       color_r = psx_gpu->primitive_color & 0xFF;
353       color_g = (psx_gpu->primitive_color >> 8) & 0xFF;
354       color_b = (psx_gpu->primitive_color >> 16) & 0xFF;
355     }      
356
357     if(flags & RENDER_FLAGS_TEXTURE_MAP)
358     {
359       u32 texel_r, texel_g, texel_b;
360       u32 u = fixed_to_int(current_u);
361       u32 v = fixed_to_int(current_v);
362
363       texel = fetch_texel(psx_gpu, u, v);
364
365       if(texel == 0)
366       {
367         transparent_pixels++;
368         goto skip_pixel;
369       }
370
371       texel_r = texel & 0x1F;
372       texel_g = (texel >> 5) & 0x1F;
373       texel_b = (texel >> 10) & 0x1F;
374
375       if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)
376       {
377         color_r *= texel_r;
378         color_g *= texel_g;
379         color_b *= texel_b;
380       }
381       else
382       {
383         color_r = texel_r << 7;
384         color_g = texel_g << 7;
385         color_b = texel_b << 7;
386       }
387
388       color_r >>= 4;
389       color_g >>= 4;
390       color_b >>= 4;
391     }
392     else
393     {
394       untextured_pixels++;
395     }
396
397     if(psx_gpu->dither_mode && ((flags & RENDER_FLAGS_SHADE) ||
398      ((flags & RENDER_FLAGS_TEXTURE_MAP) &&
399      ((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0))))
400     {
401       s32 dither_offset = dither_table[y % 4][current_x % 4];
402       color_r += dither_offset;
403       color_g += dither_offset;
404       color_b += dither_offset;
405
406       if(color_r < 0)
407         color_r = 0;
408   
409       if(color_g < 0)
410         color_g = 0;
411   
412       if(color_b < 0)
413         color_b = 0;
414     }
415
416     color_r >>= 3;
417     color_g >>= 3;
418     color_b >>= 3;
419
420     draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x, y, flags);
421
422   skip_pixel:
423   
424     current_u += span->u.step_dx;
425     current_v += span->v.step_dx;
426     current_r += span->r.step_dx;
427     current_g += span->g.step_dx;
428     current_b += span->b.step_dx;
429
430     current_x++;
431   }
432 }
433
434 void increment_span(_span_struct *span)
435 {
436   span->left_x += span->left_dx_dy;
437   span->right_x += span->right_dx_dy;
438
439   span->u.current_value += span->u.step_dy;
440   span->v.current_value += span->v.step_dy;
441   span->r.current_value += span->r.step_dy;
442   span->g.current_value += span->g.step_dy;
443   span->b.current_value += span->b.step_dy;
444 }
445
446 void decrement_span(_span_struct *span)
447 {
448   span->left_x += span->left_dx_dy;
449   span->right_x += span->right_dx_dy;
450
451   span->u.current_value -= span->u.step_dy;
452   span->v.current_value -= span->v.step_dy;
453   span->r.current_value -= span->r.step_dy;
454   span->g.current_value -= span->g.step_dy;
455   span->b.current_value -= span->b.step_dy;
456 }
457
458
459 #define compute_gradient_area_x(interpolant)                                   \
460 {                                                                              \
461   span.interpolant.gradient_area_x =                                           \
462    triangle_signed_area_x2(a->interpolant, a->y, b->interpolant, b->y,         \
463    c->interpolant, c->y);                                                      \
464 }                                                                              \
465
466 #define compute_gradient_area_y(interpolant)                                   \
467 {                                                                              \
468   span.interpolant.gradient_area_y =                                           \
469    triangle_signed_area_x2(a->x, a->interpolant,  b->x, b->interpolant,        \
470    c->x, c->interpolant);                                                      \
471 }                                                                              \
472
473 #define compute_all_gradient_areas()                                           \
474   compute_gradient_area_x(u);                                                  \
475   compute_gradient_area_x(v);                                                  \
476   compute_gradient_area_x(r);                                                  \
477   compute_gradient_area_x(g);                                                  \
478   compute_gradient_area_x(b);                                                  \
479   compute_gradient_area_y(u);                                                  \
480   compute_gradient_area_y(v);                                                  \
481   compute_gradient_area_y(r);                                                  \
482   compute_gradient_area_y(g);                                                  \
483   compute_gradient_area_y(b)                                                   \
484
485 #define set_interpolant_base(interpolant, base_vertex)                         \
486   span->interpolant.step_dx =                                                  \
487    fixed_reciprocal_multiply(span->interpolant.gradient_area_x, reciprocal,    \
488    span->triangle_winding, shift);                                             \
489   span->interpolant.step_dy =                                                  \
490    fixed_reciprocal_multiply(span->interpolant.gradient_area_y, reciprocal,    \
491    span->triangle_winding, shift);                                             \
492   span->interpolant.current_value = fixed_center(base_vertex->interpolant)     \
493
494 #define set_interpolant_bases(base_vertex)                                     \
495 {                                                                              \
496   u32 shift;                                                                   \
497   u32 reciprocal = fixed_reciprocal(span->triangle_area, &shift);              \
498   shift -= FIXED_BITS;                                                         \
499   set_interpolant_base(u, base_vertex);                                        \
500   set_interpolant_base(v, base_vertex);                                        \
501   set_interpolant_base(r, base_vertex);                                        \
502   set_interpolant_base(g, base_vertex);                                        \
503   set_interpolant_base(b, base_vertex);                                        \
504   span->base_x = span->left_x >> EDGE_STEP_BITS;                               \
505 }                                                                              \
506
507 #define compute_edge_delta(edge, start, end, height)                           \
508 {                                                                              \
509   s32 x_start = start->x;                                                      \
510   s32 x_end = end->x;                                                          \
511   s32 width = x_end - x_start;                                                 \
512                                                                                \
513   s32 shift = __builtin_clz(height);                                           \
514   u32 height_normalized = height << shift;                                     \
515   u32 height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) /           \
516    height_normalized;                                                          \
517                                                                                \
518   shift -= (50 - EDGE_STEP_BITS);                                              \
519                                                                                \
520   span->edge##_x =                                                             \
521    ((((s64)x_start * height) + (height - 1)) * height_reciprocal) << shift;    \
522   span->edge##_dx_dy = ((s64)width * height_reciprocal) << shift;              \
523 }                                                                              \
524
525
526 #define render_spans_up(height)                                                \
527   do                                                                           \
528   {                                                                            \
529     decrement_span(span);                                                      \
530     render_span(psx_gpu, span, current_y, flags);                              \
531     current_y--;                                                               \
532     height--;                                                                  \
533   } while(height)                                                              \
534
535 #define render_spans_down(height)                                              \
536   do                                                                           \
537   {                                                                            \
538     render_span(psx_gpu, span, current_y, flags);                              \
539     increment_span(span);                                                      \
540     current_y++;                                                               \
541     height--;                                                                  \
542   } while(height)                                                              \
543
544 #define render_spans_up_up(minor, major)                                       \
545   s32 current_y = bottom->y - 1;                                               \
546   s32 height_minor_a = bottom->y - middle->y;                                  \
547   s32 height_minor_b = middle->y - top->y;                                     \
548   s32 height_major = height_minor_a + height_minor_b;                          \
549                                                                                \
550   compute_edge_delta(major, bottom, top, height_major);                        \
551   compute_edge_delta(minor, bottom, middle, height_minor_a);                   \
552   set_interpolant_bases(bottom);                                               \
553                                                                                \
554   render_spans_up(height_minor_a);                                             \
555                                                                                \
556   compute_edge_delta(minor, middle, top, height_minor_b);                      \
557   render_spans_up(height_minor_b)                                              \
558
559 void render_spans_up_left(psx_gpu_struct *psx_gpu, _span_struct *span,
560  vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags)
561 {
562   render_spans_up_up(left, right);
563 }
564
565 void render_spans_up_right(psx_gpu_struct *psx_gpu, _span_struct *span,
566  vertex_struct *bottom, vertex_struct *middle, vertex_struct *top, u32 flags)
567 {
568   render_spans_up_up(right, left);
569 }
570
571 #define render_spans_down_down(minor, major)                                   \
572   s32 current_y = top->y;                                                      \
573   s32 height_minor_a = middle->y - top->y;                                     \
574   s32 height_minor_b = bottom->y - middle->y;                                  \
575   s32 height_major = height_minor_a + height_minor_b;                          \
576                                                                                \
577   compute_edge_delta(minor, top, middle, height_minor_a);                      \
578   compute_edge_delta(major, top, bottom, height_major);                        \
579   set_interpolant_bases(top);                                                  \
580                                                                                \
581   render_spans_down(height_minor_a);                                           \
582                                                                                \
583   compute_edge_delta(minor, middle, bottom, height_minor_b);                   \
584   render_spans_down(height_minor_b)                                            \
585
586 void render_spans_down_left(psx_gpu_struct *psx_gpu, _span_struct *span,
587  vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags)
588 {
589   render_spans_down_down(left, right);
590 }
591
592 void render_spans_down_right(psx_gpu_struct *psx_gpu, _span_struct *span,
593  vertex_struct *top, vertex_struct *middle, vertex_struct *bottom, u32 flags)
594 {
595   render_spans_down_down(right, left);
596 }
597
598 #define render_spans_up_flat(bottom_left, bottom_right, top_left, top_right)   \
599   s32 current_y = bottom_left->y - 1;                                          \
600   s32 height = bottom_left->y - top_left->y;                                   \
601                                                                                \
602   compute_edge_delta(left, bottom_left, top_left, height);                     \
603   compute_edge_delta(right, bottom_right, top_right, height);                  \
604   set_interpolant_bases(bottom_left);                                          \
605   render_spans_up(height)                                                      \
606
607 void render_spans_up_a(psx_gpu_struct *psx_gpu, _span_struct *span,
608  vertex_struct *bottom_left, vertex_struct *bottom_right, vertex_struct *top,
609  u32 flags)
610 {
611   render_spans_up_flat(bottom_left, bottom_right, top, top);
612 }
613
614 void render_spans_up_b(psx_gpu_struct *psx_gpu, _span_struct *span,
615  vertex_struct *bottom, vertex_struct *top_left, vertex_struct *top_right,
616  u32 flags)
617 {
618   render_spans_up_flat(bottom, bottom, top_left, top_right);
619 }
620
621 #define render_spans_down_flat(top_left, top_right, bottom_left, bottom_right) \
622   s32 current_y = top_left->y;                                                 \
623   s32 height = bottom_left->y - top_left->y;                                   \
624                                                                                \
625   compute_edge_delta(left, top_left, bottom_left, height);                     \
626   compute_edge_delta(right, top_right, bottom_right, height);                  \
627   set_interpolant_bases(top_left);                                             \
628   render_spans_down(height)                                                    \
629
630 void render_spans_down_a(psx_gpu_struct *psx_gpu, _span_struct *span,
631  vertex_struct *top_left, vertex_struct *top_right, vertex_struct *bottom,
632  u32 flags)
633 {
634   render_spans_down_flat(top_left, top_right, bottom, bottom);
635 }
636
637 void render_spans_down_b(psx_gpu_struct *psx_gpu, _span_struct *span,
638  vertex_struct *top, vertex_struct *bottom_left, vertex_struct *bottom_right,
639  u32 flags)
640 {
641   render_spans_down_flat(top, top, bottom_left, bottom_right);
642 }
643
644 void render_spans_up_down(psx_gpu_struct *psx_gpu, _span_struct *span,
645  vertex_struct *middle, vertex_struct *top, vertex_struct *bottom, u32 flags)
646 {
647   s32 middle_y = middle->y;
648   s32 current_y = middle_y - 1;
649   s32 height_minor_a = middle->y - top->y;
650   s32 height_minor_b = bottom->y - middle->y;
651   s32 height_major = height_minor_a + height_minor_b;
652
653   u64 right_x_mid;
654
655   compute_edge_delta(left, middle, top, height_minor_a);
656   compute_edge_delta(right, bottom, top, height_major);
657   set_interpolant_bases(middle);
658
659   right_x_mid = span->right_x + (span->right_dx_dy * height_minor_b);
660   span->right_x = right_x_mid;
661
662   render_spans_up(height_minor_a);  
663
664   compute_edge_delta(left, middle, bottom, height_minor_b);
665   set_interpolant_bases(middle);
666
667   span->right_dx_dy *= -1;
668   span->right_x = right_x_mid;
669   current_y = middle_y;
670
671   render_spans_down(height_minor_b);
672 }
673
674 #define vertex_swap(_a, _b)                                                    \
675 {                                                                              \
676   vertex_struct *temp_vertex = _a;                                             \
677   _a = _b;                                                                     \
678   _b = temp_vertex;                                                            \
679   triangle_winding ^= 1;                                                       \
680 }                                                                              \
681
682
683 #define triangle_y_direction_up   1
684 #define triangle_y_direction_flat 2
685 #define triangle_y_direction_down 0
686
687 #define triangle_winding_positive 0
688 #define triangle_winding_negative 1
689
690 #define triangle_set_direction(direction_variable, value)                      \
691   u32 direction_variable = (u32)(value) >> 31;                                 \
692   if(value == 0)                                                               \
693     direction_variable = 2                                                     \
694
695 #define triangle_case(direction_a, direction_b, direction_c, winding)          \
696   case (triangle_y_direction_##direction_a |                                   \
697    (triangle_y_direction_##direction_b << 2) |                                 \
698    (triangle_y_direction_##direction_c << 4) |                                 \
699    (triangle_winding_##winding << 6))                                          \
700
701
702 void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
703  u32 flags)
704 {
705   s32 triangle_area;
706   u32 triangle_winding = 0;
707   _span_struct span;
708
709   vertex_struct *a = &(vertexes[0]);
710   vertex_struct *b = &(vertexes[1]);
711   vertex_struct *c = &(vertexes[2]);
712
713   triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y);
714
715   triangles++;
716
717   if(triangle_area == 0)
718     return;
719
720   if(b->y < a->y)
721     vertex_swap(a, b);
722
723   if(c->y < b->y)
724   {
725     vertex_swap(b, c);
726
727     if(b->y < a->y)
728       vertex_swap(a, b);
729   }
730
731   if((c->y - a->y) >= 512)
732     return;
733
734   if(triangle_area < 0)
735   {
736     triangle_area = -triangle_area;
737     triangle_winding ^= 1;
738     vertex_swap(a, c);
739   }
740
741   if(b->x < a->x)
742     vertex_swap(a, b);
743
744   if(c->x < b->x) 
745   {
746     vertex_swap(b, c);
747
748     if(b->x < a->x)
749       vertex_swap(a, b);
750   }
751
752   if((c->x - a->x) >= 1024)
753     return;
754
755   s32 y_delta_a = b->y - a->y;
756   s32 y_delta_b = c->y - b->y;
757   s32 y_delta_c = c->y - a->y;
758
759   triangle_set_direction(y_direction_a, y_delta_a);
760   triangle_set_direction(y_direction_b, y_delta_b);
761   triangle_set_direction(y_direction_c, y_delta_c);
762
763   compute_all_gradient_areas();
764   span.triangle_area = triangle_area;
765   span.triangle_winding = triangle_winding;
766
767   switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
768    (triangle_winding << 6))
769   {
770     triangle_case(up, up, up, negative):
771     triangle_case(up, up, flat, negative):
772     triangle_case(up, up, down, negative):
773       render_spans_up_right(psx_gpu, &span, a, b, c, flags);
774       break;
775
776     triangle_case(flat, up, up, negative):
777     triangle_case(flat, up, flat, negative):
778     triangle_case(flat, up, down, negative):
779       render_spans_up_a(psx_gpu, &span, a, b, c, flags);
780       break;
781
782     triangle_case(down, up, up, negative):
783       render_spans_up_down(psx_gpu, &span, a, c, b, flags);
784       break;
785
786     triangle_case(down, up, flat, negative):
787       render_spans_down_a(psx_gpu, &span, a, c, b, flags);
788       break;
789
790     triangle_case(down, up, down, negative):
791       render_spans_down_right(psx_gpu, &span, a, c, b, flags);
792       break;
793
794     triangle_case(down, flat, up, negative):
795     triangle_case(down, flat, flat, negative):
796     triangle_case(down, flat, down, negative):
797       render_spans_down_b(psx_gpu, &span, a, b, c, flags);
798       break;
799
800     triangle_case(down, down, up, negative):
801     triangle_case(down, down, flat, negative):
802     triangle_case(down, down, down, negative):
803       render_spans_down_left(psx_gpu, &span, a, b, c, flags);
804       break;
805
806     triangle_case(up, up, up, positive):
807     triangle_case(up, up, flat, positive):
808     triangle_case(up, up, down, positive):
809       render_spans_up_left(psx_gpu, &span, a, b, c, flags);
810       break;
811
812     triangle_case(up, flat, up, positive):
813     triangle_case(up, flat, flat, positive):
814     triangle_case(up, flat, down, positive):
815       render_spans_up_b(psx_gpu, &span, a, b, c, flags);
816       break;
817
818     triangle_case(up, down, up, positive):
819       render_spans_up_right(psx_gpu, &span, a, c, b, flags);
820       break;
821
822     triangle_case(up, down, flat, positive):
823       render_spans_up_a(psx_gpu, &span, a, c, b, flags);
824       break;
825
826     triangle_case(up, down, down, positive):
827       render_spans_up_down(psx_gpu, &span, a, b, c, flags);
828       break;
829
830     triangle_case(flat, down, up, positive):
831     triangle_case(flat, down, flat, positive):
832     triangle_case(flat, down, down, positive):
833       render_spans_down_a(psx_gpu, &span, a, b, c, flags);
834       break;
835
836     triangle_case(down, down, up, positive):
837     triangle_case(down, down, flat, positive):
838     triangle_case(down, down, down, positive):
839       render_spans_down_right(psx_gpu, &span, a, b, c, flags);
840       break;
841   }
842   
843 }
844
845
846 void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
847  s32 width, s32 height, u32 flags)
848 {
849   // TODO: Flip/mirror
850   s32 current_x, current_y;
851   u32 current_u, current_v;
852   u32 primitive_color = psx_gpu->primitive_color;
853   u32 sprite_r, sprite_g, sprite_b;
854   s32 color_r = 0;
855   s32 color_g = 0; 
856   s32 color_b = 0;
857   u32 texel = 0;
858
859   sprite_r = primitive_color & 0xFF;
860   sprite_g = (primitive_color >> 8) & 0xFF;
861   sprite_b = (primitive_color >> 16) & 0xFF;
862
863   static u32 sprites = 0;
864
865   sprites++;
866
867   for(current_y = y, current_v = v; 
868    current_y < y + height; current_y++, current_v++)
869   {
870     for(current_x = x, current_u = u;
871      current_x < x + width; current_x++, current_u++)
872     {
873       if((current_x >= psx_gpu->viewport_start_x) &&
874        (current_y >= psx_gpu->viewport_start_y) &&
875        (current_x <= psx_gpu->viewport_end_x) &&
876        (current_y <= psx_gpu->viewport_end_y))
877       { 
878         if(psx_gpu->mask_evaluate &&
879          (psx_gpu->vram[(y * 1024) + current_x] & 0x8000))
880         {
881           continue;
882         }
883
884         if(flags & RENDER_FLAGS_TEXTURE_MAP)
885         {
886           texel = fetch_texel(psx_gpu, current_u, current_v);
887           if(texel == 0)
888             continue;
889
890           color_r = texel & 0x1F;
891           color_g = (texel >> 5) & 0x1F;
892           color_b = (texel >> 10) & 0x1F;
893
894           if((flags & RENDER_FLAGS_MODULATE_TEXELS) == 0)
895           {
896             color_r *= sprite_r;
897             color_g *= sprite_g;
898             color_b *= sprite_b;
899
900             color_r >>= 7;
901             color_g >>= 7;
902             color_b >>= 7;
903           }
904         }
905         else
906         {
907           color_r = sprite_r >> 3;
908           color_g = sprite_g >> 3;
909           color_b = sprite_b >> 3;
910         }
911
912         draw_pixel(psx_gpu, color_r, color_g, color_b, texel, current_x,
913          current_y, flags);
914       }
915     }
916   }
917 }
918
919
920 #define draw_pixel_line(_x, _y)                                                \
921   if((_x >= psx_gpu->viewport_start_x) && (_y >= psx_gpu->viewport_start_y) && \
922    (_x <= psx_gpu->viewport_end_x) && (_y <= psx_gpu->viewport_end_y))         \
923   {                                                                            \
924     if(flags & RENDER_FLAGS_SHADE)                                             \
925     {                                                                          \
926       color_r = fixed_to_int(current_r);                                       \
927       color_g = fixed_to_int(current_g);                                       \
928       color_b = fixed_to_int(current_b);                                       \
929                                                                                \
930       current_r += gradient_r;                                                 \
931       current_g += gradient_g;                                                 \
932       current_b += gradient_b;                                                 \
933     }                                                                          \
934     else                                                                       \
935     {                                                                          \
936       color_r = primitive_color & 0xFF;                                        \
937       color_g = (primitive_color >> 8) & 0xFF;                                 \
938       color_b = (primitive_color >> 16) & 0xFF;                                \
939     }                                                                          \
940                                                                                \
941     if(psx_gpu->dither_mode)                                                   \
942     {                                                                          \
943       s32 dither_offset = dither_table[_y % 4][_x % 4];                        \
944                                                                                \
945       color_r += dither_offset;                                                \
946       color_g += dither_offset;                                                \
947       color_b += dither_offset;                                                \
948                                                                                \
949       if(color_r < 0)                                                          \
950         color_r = 0;                                                           \
951                                                                                \
952       if(color_g < 0)                                                          \
953         color_g = 0;                                                           \
954                                                                                \
955       if(color_b < 0)                                                          \
956         color_b = 0;                                                           \
957     }                                                                          \
958     color_r >>= 3;                                                             \
959     color_g >>= 3;                                                             \
960     color_b >>= 3;                                                             \
961                                                                                \
962     span_pixels++;                                                             \
963                                                                                \
964     draw_pixel(psx_gpu, color_r, color_g, color_b, 0, _x, _y, flags);          \
965   }                                                                            \
966
967 #define update_increment(value)                                                \
968   value++                                                                      \
969
970 #define update_decrement(value)                                                \
971   value--                                                                      \
972
973 #define compare_increment(a, b)                                                \
974   (a <= b)                                                                     \
975
976 #define compare_decrement(a, b)                                                \
977   (a >= b)                                                                     \
978
979 #define set_line_gradients(minor)                                              \
980 {                                                                              \
981   s32 gradient_divisor = delta_##minor;                                        \
982   gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor;     \
983   gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor;     \
984   gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor;     \
985   current_r = fixed_center(vertex_a->r);                                       \
986   current_g = fixed_center(vertex_a->g);                                       \
987   current_b = fixed_center(vertex_a->b);                                       \
988 }
989
990 #define draw_line_span_horizontal(direction)                                   \
991 do                                                                             \
992 {                                                                              \
993   error_step = delta_y * 2;                                                    \
994   error_wrap = delta_x * 2;                                                    \
995   error = delta_x;                                                             \
996                                                                                \
997   current_y = y_a;                                                             \
998   set_line_gradients(x);                                                       \
999                                                                                \
1000   for(current_x = x_a; current_x <= x_b; current_x++)                          \
1001   {                                                                            \
1002     draw_pixel_line(current_x, current_y);                                     \
1003     error += error_step;                                                       \
1004                                                                                \
1005     if(error >= error_wrap)                                                    \
1006     {                                                                          \
1007       update_##direction(current_y);                                           \
1008       error -= error_wrap;                                                     \
1009     }                                                                          \
1010   }                                                                            \
1011 } while(0)                                                                     \
1012
1013 #define draw_line_span_vertical(direction)                                     \
1014 do                                                                             \
1015 {                                                                              \
1016   error_step = delta_x * 2;                                                    \
1017   error_wrap = delta_y * 2;                                                    \
1018   error = delta_y;                                                             \
1019                                                                                \
1020   current_x = x_a;                                                             \
1021   set_line_gradients(y);                                                       \
1022                                                                                \
1023   for(current_y = y_a; compare_##direction(current_y, y_b);                    \
1024    update_##direction(current_y))                                              \
1025   {                                                                            \
1026     draw_pixel_line(current_x, current_y);                                     \
1027     error += error_step;                                                       \
1028                                                                                \
1029     if(error > error_wrap)                                                     \
1030     {                                                                          \
1031       current_x++;                                                             \
1032       error -= error_wrap;                                                     \
1033     }                                                                          \
1034   }                                                                            \
1035 } while(0)                                                                     \
1036                                                                                
1037 void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags)
1038 {
1039   u32 primitive_color = psx_gpu->primitive_color;
1040   s32 color_r, color_g, color_b;
1041
1042   fixed_type gradient_r = 0;
1043   fixed_type gradient_g = 0;
1044   fixed_type gradient_b = 0;
1045   fixed_type current_r = 0;
1046   fixed_type current_g = 0;
1047   fixed_type current_b = 0;
1048
1049   s32 y_a, y_b;
1050   s32 x_a, x_b;
1051
1052   s32 delta_x, delta_y;
1053   u32 triangle_winding = 0;
1054
1055   s32 current_x;
1056   s32 current_y;
1057
1058   u32 error_step;
1059   u32 error;
1060   u32 error_wrap;
1061
1062   vertex_struct *vertex_a = &(vertexes[0]);
1063   vertex_struct *vertex_b = &(vertexes[1]);
1064
1065   if(vertex_a->x >= vertex_b->x)
1066   {
1067     vertex_swap(vertex_a, vertex_b);
1068   }
1069
1070   x_a = vertex_a->x;
1071   x_b = vertex_b->x;
1072
1073   y_a = vertex_a->y;
1074   y_b = vertex_b->y;
1075
1076   delta_x = x_b - x_a;
1077   delta_y = y_b - y_a;
1078
1079   if(delta_x >= 1024)
1080     return;
1081
1082   flags &= ~RENDER_FLAGS_TEXTURE_MAP;
1083
1084   if(delta_y < 0)
1085   {
1086     delta_y *= -1;
1087
1088     if(delta_y >= 512)
1089       return;
1090
1091     if(delta_x > delta_y)
1092       draw_line_span_horizontal(decrement);
1093     else
1094       draw_line_span_vertical(decrement);
1095   }
1096   else
1097   {
1098     if(delta_y >= 512)
1099       return;
1100
1101     if(delta_x > delta_y)
1102       draw_line_span_horizontal(increment);
1103     else
1104       draw_line_span_vertical(increment);
1105   }
1106 }
1107
1108
1109 void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
1110  u32 width, u32 height)
1111 {
1112   u32 r = color & 0xFF;
1113   u32 g = (color >> 8) & 0xFF;
1114   u32 b = (color >> 16) & 0xFF;
1115   u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
1116
1117   u16 *vram_ptr = psx_gpu->vram + x + (y * 1024);
1118   u32 draw_x, draw_y;
1119
1120   for(draw_y = 0; draw_y < height; draw_y++)
1121   {
1122     for(draw_x = 0; draw_x < width; draw_x++)
1123     {
1124       vram_ptr[draw_x] = color_16bpp;
1125     }
1126
1127     vram_ptr += 1024;
1128   }
1129 }
1130
1131 void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
1132  u32 width, u32 height, u32 pitch)
1133 {
1134   u16 *vram_ptr = psx_gpu->vram + x + (y * 1024);
1135   u32 draw_x, draw_y;
1136
1137   for(draw_y = 0; draw_y < height; draw_y++)
1138   {
1139     for(draw_x = 0; draw_x < width; draw_x++)
1140     {
1141       vram_ptr[draw_x] = source[draw_x];
1142     }
1143
1144     source += pitch;
1145     vram_ptr += 1024;
1146   }
1147 }
1148
1149 void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y,
1150  u32 dest_x, u32 dest_y, u32 width, u32 height)
1151 {
1152   render_block_copy(psx_gpu, psx_gpu->vram + source_x + (source_y * 1024),
1153    dest_x, dest_y, width, height, 1024);
1154 }
1155
1156 void initialize_psx_gpu(psx_gpu_struct *psx_gpu)
1157 {
1158   psx_gpu->pixel_count_mode = 0;
1159   psx_gpu->pixel_compare_mode = 0;
1160
1161   psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512);
1162   psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512);
1163   memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512);
1164   memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512);
1165   psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512);
1166 }