psx_gpu: add enhanced triangle rendering
authornotaz <notasas@gmail.com>
Sun, 12 Aug 2012 21:03:43 +0000 (00:03 +0300)
committernotaz <notasas@gmail.com>
Thu, 11 Oct 2012 21:05:07 +0000 (00:05 +0300)
plugins/gpu_neon/psx_gpu/common.h
plugins/gpu_neon/psx_gpu/psx_gpu.c
plugins/gpu_neon/psx_gpu/psx_gpu.h
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
plugins/gpu_neon/psx_gpu_if.c

index f299f79..d5cf3e9 100644 (file)
@@ -18,5 +18,7 @@ typedef unsigned long long int u64;
 #include "vector_ops.h"
 #include "psx_gpu.h"
 
+#define unlikely(x) __builtin_expect((x), 0)
+
 #endif
 
index 98aacc3..0c1c78d 100644 (file)
@@ -47,7 +47,8 @@ u32 zero_block_spans = 0;
 u32 texture_cache_loads = 0;
 u32 false_modulated_blocks = 0;
 
-u32 reciprocal_table[512];
+/* double size for enhancement */
+u32 reciprocal_table[512 * 2];
 
 
 typedef s32 fixed_type;
@@ -1872,7 +1873,7 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target(     \
     if(span_num_blocks)                                                        \
     {                                                                          \
       y = span_edge_data->y;                                                   \
-      fb_ptr = psx_gpu->vram_ptr + span_edge_data->left_x + (y * 1024);        \
+      fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024);    \
                                                                                \
       setup_blocks_span_initialize_##shading##_##texturing();                  \
       setup_blocks_span_initialize_##dithering(texturing);                     \
@@ -2905,8 +2906,8 @@ char *render_block_flag_strings[] =
    (triangle_y_direction_##direction_c << 4) |                                 \
    (triangle_winding_##winding << 6))                                          \
 
-void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
u32 flags)
+static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
vertex_struct *vertexes_out[3])
 {
   s32 y_top, y_bottom;
   s32 triangle_area;
@@ -2927,7 +2928,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 #ifdef PROFILE
     trivial_rejects++;
 #endif
-    return;
+    return 0;
   }
 
   if(b->y < a->y)
@@ -2949,7 +2950,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 #ifdef PROFILE
     trivial_rejects++;
 #endif
-    return;
+    return 0;
   }
 
   if(triangle_area < 0)
@@ -2975,7 +2976,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 #ifdef PROFILE
     trivial_rejects++;
 #endif
-    return;
+    return 0;
   }
 
   if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,
@@ -2984,13 +2985,28 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 #ifdef PROFILE
     trivial_rejects++;
 #endif
-    return;
+    return 0;
   }
 
-  psx_gpu->num_spans = 0;
   psx_gpu->triangle_area = triangle_area;
   psx_gpu->triangle_winding = triangle_winding;
 
+  vertexes_out[0] = a;
+  vertexes_out[1] = b;
+  vertexes_out[2] = c;
+
+  return 1;
+}
+
+static void render_triangle_p(psx_gpu_struct *psx_gpu,
+ vertex_struct *vertex_ptrs[3], u32 flags)
+{
+  psx_gpu->num_spans = 0;
+
+  vertex_struct *a = vertex_ptrs[0];
+  vertex_struct *b = vertex_ptrs[1];
+  vertex_struct *c = vertex_ptrs[2];
+
   s32 y_delta_a = b->y - a->y;
   s32 y_delta_b = c->y - b->y;
   s32 y_delta_c = c->y - a->y;
@@ -3002,7 +3018,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
   compute_all_gradients(psx_gpu, a, b, c);
 
   switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
-   (triangle_winding << 6))
+   (psx_gpu->triangle_winding << 6))
   {
     triangle_case(up, up, up, negative):
     triangle_case(up, up, flat, negative):
@@ -3126,6 +3142,14 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
    (psx_gpu);
 }
 
+void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ u32 flags)
+{
+  vertex_struct *vertex_ptrs[3];
+  if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs))
+    render_triangle_p(psx_gpu, vertex_ptrs, flags);
+}
+
 
 void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu);
 
@@ -4245,7 +4269,7 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,
 
   flags &= ~RENDER_FLAGS_TEXTURE_MAP;
 
-  vram_ptr = psx_gpu->vram_ptr + (y_a * 1024) + x_a;
+  vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a;
 
   control_mask = 0x0;
 
@@ -4435,7 +4459,6 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
   if((width == 0) || (height == 0))
     return;
 
-  flush_render_block_buffer(psx_gpu);
   invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
 
   u32 r = color & 0xFF;
@@ -4445,7 +4468,7 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
    psx_gpu->mask_msb;
   u32 color_32bpp = color_16bpp | (color_16bpp << 16);
 
-  u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024));
+  u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
 
   u32 pitch = 512 - (width / 2);
   u32 num_width;
@@ -4522,7 +4545,8 @@ void initialize_reciprocal_table(void)
   u32 height_reciprocal;
   s32 shift;
 
-  for(height = 1; height < 512; height++)
+  for(height = 1; height < sizeof(reciprocal_table)
+       / sizeof(reciprocal_table[0]); height++)
   {
     shift = __builtin_clz(height);
     height_normalized = height << shift;
@@ -4561,6 +4585,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
   psx_gpu->num_blocks = 0;
 
   psx_gpu->vram_ptr = vram;
+  psx_gpu->vram_out_ptr = vram;
 
   psx_gpu->texture_page_base = psx_gpu->vram_ptr;
   psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;
index 53a8717..7ed5622 100644 (file)
@@ -122,7 +122,7 @@ typedef struct
   vec_4x32u g_block_span;
   vec_4x32u b_block_span;
 
-  // 72 bytes
+  // 76 bytes
   u32 b;
   u32 b_dy;
 
@@ -143,6 +143,7 @@ typedef struct
   void *texture_page_base;
   u16 *clut_ptr;
   u16 *vram_ptr;
+  u16 *vram_out_ptr;
 
   // 26 bytes
   u16 render_state_base;
@@ -180,9 +181,16 @@ typedef struct
   u8 primitive_type;
   u8 interlace_mode;
 
+  // enhancement stuff
+  u16 *enhancement_buf_ptr;
+  s16 saved_viewport_start_x;
+  s16 saved_viewport_start_y;
+  s16 saved_viewport_end_x;
+  s16 saved_viewport_end_y;
+
   // Align up to 64 byte boundary to keep the upcoming buffers cache line
-  // aligned
-  //u8 reserved_a[0];
+  // aligned, also make reachable with single immediate addition
+  u8 reserved_a[240];
 
   // 8KB
   block_struct blocks[MAX_BLOCKS_PER_ROW];
index 79d5466..6393e15 100644 (file)
@@ -1337,7 +1337,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect)         \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -1604,7 +1604,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect)       \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -1799,7 +1799,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect)
   ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
   ldrh y, [ span_edge_data, #edge_data_y_offset ]
 
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
 
   cmp span_num_blocks, #0
   beq 1f
@@ -1919,7 +1919,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
   ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
   ldrh y, [ span_edge_data, #edge_data_y_offset ]
 
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
 
   cmp span_num_blocks, #0
   beq 1f
@@ -2106,7 +2106,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect)     \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -2346,7 +2346,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct)       \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -4804,7 +4804,7 @@ function(setup_sprite_##texture_mode)                                          \
   and offset_u, u, #0xF;                                                       \
                                                                                \
   ldr width, [ sp, #40 ];                                                      \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   ldr height, [ sp, #44 ];                                                     \
   add fb_ptr, fb_ptr, y, lsl #11;                                              \
@@ -5052,7 +5052,7 @@ setup_sprites_16bpp_flush_row:
 
 function(setup_sprite_16bpp)
   stmdb sp!, { r4 - r11, r14 }
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
 
   ldr v, [ sp, #36 ]
   add fb_ptr, fb_ptr, y, lsl #11
index a47d965..2e18174 100644 (file)
 #define psx_gpu_texture_page_base_offset                  0xd0
 #define psx_gpu_clut_ptr_offset                           0xd4
 #define psx_gpu_vram_ptr_offset                           0xd8
-#define psx_gpu_render_state_base_offset                  0xdc
-#define psx_gpu_render_state_offset                       0xde
-#define psx_gpu_num_spans_offset                          0xe0
-#define psx_gpu_num_blocks_offset                         0xe2
-#define psx_gpu_offset_x_offset                           0xe4
-#define psx_gpu_offset_y_offset                           0xe6
-#define psx_gpu_clut_settings_offset                      0xe8
-#define psx_gpu_texture_settings_offset                   0xea
-#define psx_gpu_viewport_start_x_offset                   0xec
-#define psx_gpu_viewport_start_y_offset                   0xee
-#define psx_gpu_viewport_end_x_offset                     0xf0
-#define psx_gpu_viewport_end_y_offset                     0xf2
-#define psx_gpu_mask_msb_offset                           0xf4
-#define psx_gpu_triangle_winding_offset                   0xf6
-#define psx_gpu_display_area_draw_enable_offset           0xf7
-#define psx_gpu_current_texture_page_offset               0xf8
-#define psx_gpu_last_8bpp_texture_page_offset             0xf9
-#define psx_gpu_texture_mask_width_offset                 0xfa
-#define psx_gpu_texture_mask_height_offset                0xfb
-#define psx_gpu_texture_window_x_offset                   0xfc
-#define psx_gpu_texture_window_y_offset                   0xfd
-#define psx_gpu_primitive_type_offset                     0xfe
-#define psx_gpu_interlace_mode_offset                     0xff
-#define psx_gpu_blocks_offset                             0x100
-#define psx_gpu_span_uvrg_offset_offset                   0x2100
-#define psx_gpu_span_edge_data_offset                     0x4100
-#define psx_gpu_span_b_offset_offset                      0x5100
-#define psx_gpu_texture_4bpp_cache_offset                 0x5900
-#define psx_gpu_texture_8bpp_even_cache_offset            0x205900
-#define psx_gpu_texture_8bpp_odd_cache_offset             0x305900
+#define psx_gpu_vram_out_ptr_offset                       0xdc
+#define psx_gpu_render_state_base_offset                  0xe0
+#define psx_gpu_render_state_offset                       0xe2
+#define psx_gpu_num_spans_offset                          0xe4
+#define psx_gpu_num_blocks_offset                         0xe6
+#define psx_gpu_offset_x_offset                           0xe8
+#define psx_gpu_offset_y_offset                           0xea
+#define psx_gpu_clut_settings_offset                      0xec
+#define psx_gpu_texture_settings_offset                   0xee
+#define psx_gpu_viewport_start_x_offset                   0xf0
+#define psx_gpu_viewport_start_y_offset                   0xf2
+#define psx_gpu_viewport_end_x_offset                     0xf4
+#define psx_gpu_viewport_end_y_offset                     0xf6
+#define psx_gpu_mask_msb_offset                           0xf8
+#define psx_gpu_triangle_winding_offset                   0xfa
+#define psx_gpu_display_area_draw_enable_offset           0xfb
+#define psx_gpu_current_texture_page_offset               0xfc
+#define psx_gpu_last_8bpp_texture_page_offset             0xfd
+#define psx_gpu_texture_mask_width_offset                 0xfe
+#define psx_gpu_texture_mask_height_offset                0xff
+#define psx_gpu_texture_window_x_offset                   0x100
+#define psx_gpu_texture_window_y_offset                   0x101
+#define psx_gpu_primitive_type_offset                     0x102
+#define psx_gpu_interlace_mode_offset                     0x103
+#define psx_gpu_blocks_offset                             0x200
+#define psx_gpu_span_uvrg_offset_offset                   0x2200
+#define psx_gpu_span_edge_data_offset                     0x4200
+#define psx_gpu_span_b_offset_offset                      0x5200
+#define psx_gpu_texture_4bpp_cache_offset                 0x5a00
+#define psx_gpu_texture_8bpp_even_cache_offset            0x205a00
+#define psx_gpu_texture_8bpp_odd_cache_offset             0x305a00
index 2275f59..d81f8aa 100644 (file)
@@ -48,6 +48,7 @@ int main()
        WRITE_OFFSET(f, texture_page_base);
        WRITE_OFFSET(f, clut_ptr);
        WRITE_OFFSET(f, vram_ptr);
+       WRITE_OFFSET(f, vram_out_ptr);
        WRITE_OFFSET(f, render_state_base);
        WRITE_OFFSET(f, render_state);
        WRITE_OFFSET(f, num_spans);
index 920c638..26715c6 100644 (file)
@@ -152,6 +152,52 @@ void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color)
   }
 }
 
+static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y,
+ u32 width, u32 height, u32 color)
+{
+  x &= ~0xF;
+  width = ((width + 0xF) & ~0xF);
+
+  flush_render_block_buffer(psx_gpu);
+
+  if(unlikely((x + width) > 1024))
+  {
+    u32 width_a = 1024 - x;
+    u32 width_b = width - width_a;
+
+    if(unlikely((y + height) > 512))
+    {
+      u32 height_a = 512 - y;
+      u32 height_b = height - height_a;
+
+      render_block_fill(psx_gpu, color, x, y, width_a, height_a);
+      render_block_fill(psx_gpu, color, 0, y, width_b, height_a);
+      render_block_fill(psx_gpu, color, x, 0, width_a, height_b);
+      render_block_fill(psx_gpu, color, 0, 0, width_b, height_b);
+    }
+    else
+    {
+      render_block_fill(psx_gpu, color, x, y, width_a, height);
+      render_block_fill(psx_gpu, color, 0, y, width_b, height);
+    }
+  }
+  else
+  {
+    if(unlikely((y + height) > 512))
+    {
+      u32 height_a = 512 - y;
+      u32 height_b = height - height_a;
+
+      render_block_fill(psx_gpu, color, x, y, width, height_a);
+      render_block_fill(psx_gpu, color, x, 0, width, height_b);
+    }
+    else
+    {
+      render_block_fill(psx_gpu, color, x, y, width, height);
+    }
+  }
+}
+
 #define sign_extend_12bit(value)                                               \
   (((s32)((value) << 20)) >> 20)                                               \
 
@@ -235,45 +281,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
         u32 height = list_s16[5] & 0x1FF;
         u32 color = list[0] & 0xFFFFFF;
 
-        x &= ~0xF;
-        width = ((width + 0xF) & ~0xF);
-
-        if((x + width) > 1024)
-        {
-          u32 width_a = 1024 - x;
-          u32 width_b = width - width_a;
-
-          if((y + height) > 512)
-          {
-            u32 height_a = 512 - y;
-            u32 height_b = height - height_a;
-
-            render_block_fill(psx_gpu, color, x, y, width_a, height_a);
-            render_block_fill(psx_gpu, color, 0, y, width_b, height_a);
-            render_block_fill(psx_gpu, color, x, 0, width_a, height_b);
-            render_block_fill(psx_gpu, color, 0, 0, width_b, height_b);
-          }
-          else
-          {
-            render_block_fill(psx_gpu, color, x, y, width_a, height);
-            render_block_fill(psx_gpu, color, 0, y, width_b, height);
-          }
-        }
-        else
-        {
-          if((y + height) > 512)
-          {
-            u32 height_a = 512 - y;
-            u32 height_b = height - height_a;
-
-            render_block_fill(psx_gpu, color, x, y, width, height_a);
-            render_block_fill(psx_gpu, color, x, 0, width, height_b);
-          }
-          else
-          {
-            render_block_fill(psx_gpu, color, x, y, width, height);
-          }
-        }
+        do_fill(psx_gpu, x, y, width, height, color);
                        break;
       }
   
@@ -741,3 +749,602 @@ breakloop:
   return list - list_start;
 }
 
+#define enhancement_disable() { \
+  psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \
+  psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x; \
+  psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \
+  psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \
+  psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \
+}
+
+#define enhancement_enable() { \
+  psx_gpu->vram_out_ptr = psx_gpu->enhancement_buf_ptr; \
+  psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \
+  psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \
+  psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2; \
+  psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2; \
+}
+
+#define shift_vertices3(v) { \
+  v[0]->x *= 2; \
+  v[0]->y *= 2; \
+  v[1]->x *= 2; \
+  v[1]->y *= 2; \
+  v[2]->x *= 2; \
+  v[2]->y *= 2; \
+}
+
+#define unshift_vertices3(v) { \
+  v[0]->x /= 2; \
+  v[0]->y /= 2; \
+  v[1]->x /= 2; \
+  v[1]->y /= 2; \
+  v[2]->x /= 2; \
+  v[2]->y /= 2; \
+}
+
+#define shift_triangle_area() \
+  psx_gpu->triangle_area *= 4
+
+static int disable_main_render;
+
+static void do_triangle_enhanced(psx_gpu_struct *psx_gpu,
+ vertex_struct *vertexes, u32 current_command)
+{
+  vertex_struct *vertex_ptrs[3];
+
+  if (!prepare_triangle(psx_gpu, vertexes, vertex_ptrs))
+    return;
+
+  if (!disable_main_render)
+    render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+
+  enhancement_enable();
+  shift_vertices3(vertex_ptrs);
+  shift_triangle_area();
+  render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+}
+
+static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ u32 current_command)
+{
+  vertex_struct *vertex_ptrs[3];
+
+  if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) {
+    if (!disable_main_render)
+      render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+
+    enhancement_enable();
+    shift_vertices3(vertex_ptrs);
+    shift_triangle_area();
+    render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+    unshift_vertices3(vertex_ptrs);
+  }
+  enhancement_disable();
+  if (prepare_triangle(psx_gpu, &vertexes[1], vertex_ptrs)) {
+    if (!disable_main_render)
+      render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+
+    enhancement_enable();
+    shift_vertices3(vertex_ptrs);
+    shift_triangle_area();
+    render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+  }
+}
+
+u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
+{
+  u32 current_command = 0, command_length;
+
+  u32 *list_start = list;
+  u32 *list_end = list + (size / 4);
+
+  psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x;
+  psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y;
+  psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x;
+  psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y;
+
+  for(; list < list_end; list += 1 + command_length)
+  {
+    s16 *list_s16 = (void *)list;
+    current_command = *list >> 24;
+    command_length = command_lengths[current_command];
+    if (list + 1 + command_length > list_end) {
+      current_command = (u32)-1;
+      break;
+    }
+
+    enhancement_disable();
+
+    switch(current_command)
+    {
+      case 0x00:
+        break;
+  
+      case 0x02:
+      {
+        u32 x = list_s16[2] & 0x3FF;
+        u32 y = list_s16[3] & 0x1FF;
+        u32 width = list_s16[4] & 0x3FF;
+        u32 height = list_s16[5] & 0x1FF;
+        u32 color = list[0] & 0xFFFFFF;
+
+        do_fill(psx_gpu, x, y, width, height, color);
+
+        psx_gpu->vram_out_ptr = psx_gpu->enhancement_buf_ptr;
+        x *= 2;
+        y *= 2;
+        width *= 2;
+        height *= 2;
+        if (width > 1024)
+          width = 1024;
+        render_block_fill(psx_gpu, color, x, y, width, height);
+        break;
+      }
+  
+      case 0x20 ... 0x23:
+      {
+        set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+  
+        get_vertex_data_xy(0, 2);
+        get_vertex_data_xy(1, 4);
+        get_vertex_data_xy(2, 6);
+
+        do_triangle_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x24 ... 0x27:
+      {
+        set_clut(psx_gpu, list_s16[5]);
+        set_texture(psx_gpu, list_s16[9]);
+        set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+  
+        get_vertex_data_xy_uv(0, 2);
+        get_vertex_data_xy_uv(1, 6);
+        get_vertex_data_xy_uv(2, 10);
+  
+        do_triangle_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x28 ... 0x2B:
+      {
+        set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+  
+        get_vertex_data_xy(0, 2);
+        get_vertex_data_xy(1, 4);
+        get_vertex_data_xy(2, 6);
+        get_vertex_data_xy(3, 8);
+
+        do_quad_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x2C ... 0x2F:
+      {
+        set_clut(psx_gpu, list_s16[5]);
+        set_texture(psx_gpu, list_s16[9]);
+        set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+  
+        get_vertex_data_xy_uv(0, 2);   
+        get_vertex_data_xy_uv(1, 6);   
+        get_vertex_data_xy_uv(2, 10);  
+        get_vertex_data_xy_uv(3, 14);
+  
+        do_quad_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x30 ... 0x33:
+      {
+        get_vertex_data_xy_rgb(0, 0);
+        get_vertex_data_xy_rgb(1, 4);
+        get_vertex_data_xy_rgb(2, 8);
+  
+        do_triangle_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x34:
+      case 0x35:
+      case 0x36:
+      case 0x37:
+      {
+        set_clut(psx_gpu, list_s16[5]);
+        set_texture(psx_gpu, list_s16[11]);
+  
+        get_vertex_data_xy_uv_rgb(0, 0);
+        get_vertex_data_xy_uv_rgb(1, 6);
+        get_vertex_data_xy_uv_rgb(2, 12);
+
+        do_triangle_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x38:
+      case 0x39:
+      case 0x3A:
+      case 0x3B:
+      {
+        get_vertex_data_xy_rgb(0, 0);
+        get_vertex_data_xy_rgb(1, 4);
+        get_vertex_data_xy_rgb(2, 8);
+        get_vertex_data_xy_rgb(3, 12);
+  
+        do_quad_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x3C:
+      case 0x3D:
+      case 0x3E:
+      case 0x3F:
+      {
+        set_clut(psx_gpu, list_s16[5]);
+        set_texture(psx_gpu, list_s16[11]);
+  
+        get_vertex_data_xy_uv_rgb(0, 0);
+        get_vertex_data_xy_uv_rgb(1, 6);
+        get_vertex_data_xy_uv_rgb(2, 12);
+        get_vertex_data_xy_uv_rgb(3, 18);
+
+        do_quad_enhanced(psx_gpu, vertexes, current_command);
+        break;
+      }
+  
+      case 0x40 ... 0x47:
+      {
+        vertexes[0].x = list_s16[2] + psx_gpu->offset_x;
+        vertexes[0].y = list_s16[3] + psx_gpu->offset_y;
+        vertexes[1].x = list_s16[4] + psx_gpu->offset_x;
+        vertexes[1].y = list_s16[5] + psx_gpu->offset_y;
+
+        render_line(psx_gpu, vertexes, current_command, list[0]);
+        break;
+      }
+  
+      case 0x48 ... 0x4F:
+      {
+        u32 num_vertexes = 1;
+        u32 *list_position = &(list[2]);
+        u32 xy = list[1];
+
+        vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+        vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+      
+        xy = *list_position;
+        while(1)
+        {
+          vertexes[0] = vertexes[1];
+
+          vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+          vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+
+          render_line(psx_gpu, vertexes, current_command, list[0]);
+
+          list_position++;
+          num_vertexes++;
+
+          if(list_position >= list_end)
+            break;
+
+          xy = *list_position;
+          if((xy & 0xF000F000) == 0x50005000)
+            break;
+        }
+
+        command_length += (num_vertexes - 2);
+        break;
+      }
+  
+      case 0x50 ... 0x57:
+      {
+        vertexes[0].r = list[0] & 0xFF;
+        vertexes[0].g = (list[0] >> 8) & 0xFF;
+        vertexes[0].b = (list[0] >> 16) & 0xFF;
+        vertexes[0].x = list_s16[2] + psx_gpu->offset_x;
+        vertexes[0].y = list_s16[3] + psx_gpu->offset_y;
+
+        vertexes[1].r = list[2] & 0xFF;
+        vertexes[1].g = (list[2] >> 8) & 0xFF;
+        vertexes[1].b = (list[2] >> 16) & 0xFF;
+        vertexes[1].x = list_s16[6] + psx_gpu->offset_x;
+        vertexes[1].y = list_s16[7] + psx_gpu->offset_y;
+
+        render_line(psx_gpu, vertexes, current_command, 0);
+        break;
+      }
+      case 0x58 ... 0x5F:
+      {
+        u32 num_vertexes = 1;
+        u32 *list_position = &(list[2]);
+        u32 color = list[0];
+        u32 xy = list[1];
+
+        vertexes[1].r = color & 0xFF;
+        vertexes[1].g = (color >> 8) & 0xFF;
+        vertexes[1].b = (color >> 16) & 0xFF;
+        vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+        vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+      
+        color = list_position[0];
+        while(1)
+        {
+          xy = list_position[1];
+
+          vertexes[0] = vertexes[1];
+
+          vertexes[1].r = color & 0xFF;
+          vertexes[1].g = (color >> 8) & 0xFF;
+          vertexes[1].b = (color >> 16) & 0xFF;
+          vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+          vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+
+          render_line(psx_gpu, vertexes, current_command, 0);
+
+          list_position += 2;
+          num_vertexes++;
+
+          if(list_position >= list_end)
+            break;
+
+          color = list_position[0];
+          if((color & 0xF000F000) == 0x50005000)
+            break;
+        }
+
+        command_length += ((num_vertexes - 2) * 2);
+        break;
+      }
+  
+      case 0x60 ... 0x63:
+      {        
+        u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+        u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+        u32 width = list_s16[4] & 0x3FF;
+        u32 height = list_s16[5] & 0x1FF;
+
+        render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]);
+        break;
+      }
+  
+      case 0x64 ... 0x67:
+      {        
+        u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+        u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+        u32 uv = list_s16[4];
+        u32 width = list_s16[6] & 0x3FF;
+        u32 height = list_s16[7] & 0x1FF;
+
+        set_clut(psx_gpu, list_s16[5]);
+
+        render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height,
+         current_command, list[0]);
+        break;
+      }
+  
+      case 0x68:
+      case 0x69:
+      case 0x6A:
+      case 0x6B:
+      {
+        s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+        s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+
+        render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]);
+        break;
+      }
+  
+      case 0x70:
+      case 0x71:
+      case 0x72:
+      case 0x73:
+      {        
+        s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+        s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+
+        render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]);
+        break;
+      }
+  
+      case 0x74:
+      case 0x75:
+      case 0x76:
+      case 0x77:
+      {        
+        s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+        s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+        u32 uv = list_s16[4];
+
+        set_clut(psx_gpu, list_s16[5]);
+
+        render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8,
+         current_command, list[0]);
+        break;
+      }
+  
+      case 0x78:
+      case 0x79:
+      case 0x7A:
+      case 0x7B:
+      {        
+        s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+        s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+
+        render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]);
+        break;
+      }
+  
+      case 0x7C:
+      case 0x7D:
+      case 0x7E:
+      case 0x7F:
+      {        
+        s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+        s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+        u32 uv = list_s16[4];
+
+        set_clut(psx_gpu, list_s16[5]);
+
+        render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16,
+         current_command, list[0]);
+        break;
+      }
+  
+      case 0x80:          //  vid -> vid
+        render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF,
+         list_s16[4] & 0x3FF, list_s16[5] & 0x1FF,
+         ((list_s16[6] - 1) & 0x3FF) + 1, ((list_s16[7] - 1) & 0x1FF) + 1);
+        break;
+#ifdef PCSX
+      case 0xA0:          //  sys -> vid
+      case 0xC0:          //  vid -> sys
+        goto breakloop;
+#else
+      case 0xA0:          //  sys -> vid
+      {
+        u32 load_x = list_s16[2] & 0x3FF;
+        u32 load_y = list_s16[3] & 0x1FF;
+        u32 load_width = list_s16[4] & 0x3FF;
+        u32 load_height = list_s16[5] & 0x1FF;
+        u32 load_size = load_width * load_height;
+  
+        command_length += load_size / 2;
+
+        if(load_size & 1)
+          command_length++;
+
+        render_block_copy(psx_gpu, (u16 *)&(list_s16[6]), load_x, load_y,
+         load_width, load_height, load_width);
+        break;
+      }
+
+      case 0xC0:          //  vid -> sys
+        break;
+#endif
+
+      case 0xE1:
+        set_texture(psx_gpu, list[0] & 0x1FF);
+
+        if(list[0] & (1 << 9))
+          psx_gpu->render_state_base |= RENDER_STATE_DITHER;
+        else
+          psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
+
+        psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1;
+        SET_Ex(1, list[0]);
+        break;
+  
+      case 0xE2:
+      {
+        // TODO: Clean
+        u32 texture_window_settings = list[0];
+        u32 tmp, x, y, w, h;
+
+        if(texture_window_settings != psx_gpu->texture_window_settings)
+        {
+          tmp = (texture_window_settings & 0x1F) | 0x20;
+          for(w = 8; (tmp & 1) == 0; tmp >>= 1, w <<= 1);
+
+          tmp = ((texture_window_settings >> 5) & 0x1f) | 0x20;
+          for (h = 8; (tmp & 1) == 0; tmp >>= 1, h <<= 1);
+
+          tmp = 32 - (w >> 3);
+          x = ((texture_window_settings >> 10) & tmp) << 3;
+
+          tmp = 32 - (h >> 3);
+          y = ((texture_window_settings >> 15) & tmp) << 3;
+
+          flush_render_block_buffer(psx_gpu);
+          
+          psx_gpu->texture_window_settings = texture_window_settings;
+          psx_gpu->texture_window_x = x;
+          psx_gpu->texture_window_y = y;
+          psx_gpu->texture_mask_width = w - 1;
+          psx_gpu->texture_mask_height = h - 1;
+
+          update_texture_ptr(psx_gpu);
+        }
+        SET_Ex(2, list[0]);
+        break;
+      }
+  
+      case 0xE3:
+        psx_gpu->viewport_start_x = list[0] & 0x3FF;
+        psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF;
+        psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x;
+        psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y;
+
+#ifdef TEXTURE_CACHE_4BPP
+        psx_gpu->viewport_mask =
+         texture_region_mask(psx_gpu->viewport_start_x,
+         psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
+         psx_gpu->viewport_end_y);
+#endif
+        SET_Ex(3, list[0]);
+        break;
+  
+      case 0xE4:
+        psx_gpu->viewport_end_x = list[0] & 0x3FF;
+        psx_gpu->viewport_end_y = (list[0] >> 10) & 0x1FF;
+        psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x;
+        psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y;
+
+#ifdef TEXTURE_CACHE_4BPP
+        psx_gpu->viewport_mask =
+         texture_region_mask(psx_gpu->viewport_start_x,
+         psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
+         psx_gpu->viewport_end_y);
+#endif
+        SET_Ex(4, list[0]);
+        break;
+  
+      case 0xE5:
+      {
+        s32 offset_x = list[0] << 21;
+        s32 offset_y = list[0] << 10;
+        psx_gpu->offset_x = offset_x >> 21;
+        psx_gpu->offset_y = offset_y >> 21; 
+  
+        SET_Ex(5, list[0]);
+        break;
+      }
+
+      case 0xE6:
+      {
+        u32 mask_settings = list[0];
+        u16 mask_msb = mask_settings << 15;
+
+        if(list[0] & 0x2)
+          psx_gpu->render_state_base |= RENDER_STATE_MASK_EVALUATE;
+        else
+          psx_gpu->render_state_base &= ~RENDER_STATE_MASK_EVALUATE;
+
+        if(mask_msb != psx_gpu->mask_msb)
+        {
+          flush_render_block_buffer(psx_gpu);
+          psx_gpu->mask_msb = mask_msb;
+        }
+
+        SET_Ex(6, list[0]);
+        break;
+      }
+  
+      default:
+        break;
+    }
+  }
+
+#ifdef PCSX
+breakloop:
+#endif
+enhancement_disable();
+  if (last_command != NULL)
+    *last_command = current_command;
+  return list - list_start;
+}
+
+// vim:shiftwidth=2:expandtab
index ff31c27..8610c83 100644 (file)
@@ -27,7 +27,12 @@ static psx_gpu_struct egpu __attribute__((aligned(256)));
 
 int do_cmd_list(uint32_t *list, int count, int *last_cmd)
 {
-  int ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
+  int ret;
+
+  if (gpu.state.enhancement_active)
+    ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd);
+  else
+    ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
 
   ex_regs[1] &= ~0x1ff;
   ex_regs[1] |= egpu.texture_settings & 0x1ff;
@@ -38,6 +43,7 @@ int renderer_init(void)
 {
   initialize_psx_gpu(&egpu, gpu.vram);
   ex_regs = gpu.ex_regs;
+  gpu.state.enhancement_available = 1;
   return 0;
 }
 
@@ -65,6 +71,10 @@ void renderer_set_interlace(int enable, int is_odd)
     egpu.interlace_mode |= RENDER_INTERLACE_ODD;
 }
 
+#include "../../frontend/plugin_lib.h"
+
 void renderer_set_config(const struct rearmed_cbs *cbs)
 {
+  egpu.enhancement_buf_ptr = gpu.enhancement_bufer;
+  disable_main_render = cbs->gpu_neon.enhancement_no_main;
 }