psx_gpu: texture cache fix, updates
authorExophase <exophase@gmail.com>
Fri, 23 Dec 2011 00:47:19 +0000 (02:47 +0200)
committernotaz <notasas@gmail.com>
Fri, 23 Dec 2011 00:47:26 +0000 (02:47 +0200)
plugins/gpu_neon/psx_gpu/psx_gpu.c
plugins/gpu_neon/psx_gpu/psx_gpu.h
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
plugins/gpu_neon/psx_gpu/psx_gpu_main.c
plugins/gpu_neon/psx_gpu/psx_gpu_parse.c

index 84848f8..2acfedc 100644 (file)
@@ -20,7 +20,6 @@
 
 u32 span_pixels = 0;
 u32 span_pixel_blocks = 0;
-u32 span_pixel_blocks_unaligned = 0;
 u32 spans = 0;
 u32 triangles = 0;
 u32 sprites = 0;
@@ -39,9 +38,6 @@ u32 texel_blocks_8bpp = 0;
 u32 texel_blocks_16bpp = 0;
 u32 texel_blocks_untextured = 0;
 u32 blend_blocks = 0;
-u32 untextured_pixels = 0;
-u32 blend_pixels = 0;
-u32 transparent_pixels = 0;
 u32 render_buffer_flushes = 0;
 u32 state_changes = 0;
 u32 left_split_triangles = 0;
@@ -49,8 +45,7 @@ u32 flat_triangles = 0;
 u32 clipped_triangles = 0;
 u32 zero_block_spans = 0;
 u32 texture_cache_loads = 0;
-u32 false_modulated_triangles = 0;
-u32 false_modulated_sprites = 0;
+u32 false_modulated_blocks = 0;
 
 u32 reciprocal_table[512];
 
@@ -241,6 +236,7 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1,
 {
   u32 mask = texture_region_mask(x1, y1, x2, y2) &
    psx_gpu->viewport_mask;
+
   psx_gpu->dirty_textures_4bpp_mask |= mask;
   psx_gpu->dirty_textures_8bpp_mask |= mask;
   psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
@@ -257,7 +253,7 @@ void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
 void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
 {
   u32 current_texture_page = psx_gpu->current_texture_page;
-  u8 *texture_page_ptr = psx_gpu->texture_page_ptr;
+  u8 *texture_page_ptr = psx_gpu->texture_page_base;
   u16 *vram_ptr = psx_gpu->vram_ptr;
 
   u32 texel_block;
@@ -319,7 +315,7 @@ void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
 void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
  u32 texture_page)
 {
-  u16 *texture_page_ptr = psx_gpu->texture_page_ptr;
+  u16 *texture_page_ptr = psx_gpu->texture_page_base;
   u16 *vram_ptr = psx_gpu->vram_ptr;
 
   u32 tile_x, tile_y;
@@ -413,8 +409,10 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
     render_block_handler->shade_blocks(psx_gpu);
     render_block_handler->blend_blocks(psx_gpu);
 
+#ifdef PROFILE
     span_pixel_blocks += psx_gpu->num_blocks;
     render_buffer_flushes++;
+#endif
 
     psx_gpu->num_blocks = 0;
   }
@@ -1748,6 +1746,8 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
   }                                                                            \
 
 #define setup_blocks_add_blocks_direct()                                       \
+  texel_blocks_untextured += span_num_blocks;                                  \
+  span_pixel_blocks += span_num_blocks                                         \
 
 
 #define setup_blocks_builder(shading, texturing, dithering, sw, target)        \
@@ -1787,7 +1787,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target(     \
       s32 pixel_span = span_num_blocks * 8;                                    \
       pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF);     \
       span_pixels += pixel_span;                                               \
-      span_pixel_blocks_unaligned += (pixel_span + 7) / 8;                     \
                                                                                \
       span_num_blocks--;                                                       \
       while(span_num_blocks)                                                   \
@@ -2017,16 +2016,29 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu)
 }                                                                              \
 
 
-#define shade_blocks_textured_modulated_shaded_primitive_load()                \
+#define shade_blocks_textured_false_modulated_check_dithered(target)           \
+
+#define shade_blocks_textured_false_modulated_check_undithered(target)         \
+  if(psx_gpu->triangle_color == 0x808080)                                      \
+  {                                                                            \
+                                                                               \
+    shade_blocks_textured_unmodulated_##target(psx_gpu);                       \
+    false_modulated_blocks += num_blocks;                                      \
+    return;                                                                    \
+  }                                                                            \
+
+
+#define shade_blocks_textured_modulated_shaded_primitive_load(dithering,       \
+ target)                                                                       \
 
-#define shade_blocks_textured_modulated_unshaded_primitive_load()              \
+#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering,     \
+ target)                                                                       \
 {                                                                              \
   u32 color = psx_gpu->triangle_color;                                         \
   dup_8x8b(colors_r, color);                                                   \
   dup_8x8b(colors_g, color >> 8);                                              \
   dup_8x8b(colors_b, color >> 16);                                             \
-  if(psx_gpu->triangle_color == 0x808080)                                      \
-    false_modulated_triangles++;                                               \
+  shade_blocks_textured_false_modulated_check_##dithering(target);             \
 }                                                                              \
 
 #define shade_blocks_textured_modulated_shaded_block_load()                    \
@@ -2091,7 +2103,8 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target(       \
                                                                                \
   dup_8x16b(d128_0x8000, 0x8000);                                              \
                                                                                \
-  shade_blocks_textured_modulated_##shading##_primitive_load();                \
+  shade_blocks_textured_modulated_##shading##_primitive_load(dithering,        \
+   target);                                                                    \
                                                                                \
   while(num_blocks)                                                            \
   {                                                                            \
@@ -2157,6 +2170,9 @@ void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct
 void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct
  *psx_gpu);
 
+void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
+void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
+
 #ifndef NEON_BUILD
 
 shade_blocks_textured_modulated_builder(shaded, dithered, direct);
@@ -2204,8 +2220,38 @@ void shade_blocks_textured_unmodulated_##target(psx_gpu_struct *psx_gpu)       \
   }                                                                            \
 }                                                                              \
 
-void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
+#define shade_blocks_textured_unmodulated_dithered_builder(target)             \
+void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct        \
+ *psx_gpu)                                                                     \
+{                                                                              \
+  block_struct *block = psx_gpu->blocks;                                       \
+  u32 num_blocks = psx_gpu->num_blocks;                                        \
+  vec_8x16u draw_mask;                                                         \
+  vec_8x16u test_mask = psx_gpu->test_mask;                                    \
+  u32 draw_mask_bits;                                                          \
+                                                                               \
+  vec_8x16u pixels;                                                            \
+  shade_blocks_load_msb_mask_##target();                                       \
+                                                                               \
+  while(num_blocks)                                                            \
+  {                                                                            \
+    vec_8x16u zero_mask;                                                       \
+                                                                               \
+    draw_mask_bits = block->draw_mask_bits;                                    \
+    dup_8x16b(draw_mask, draw_mask_bits);                                      \
+    tst_8x16b(draw_mask, draw_mask, test_mask);                                \
+                                                                               \
+    pixels = block->texels;                                                    \
+                                                                               \
+    cmpeqz_8x16b(zero_mask, pixels);                                           \
+    or_8x16b(zero_mask, draw_mask, zero_mask);                                 \
+                                                                               \
+    shade_blocks_store_##target(zero_mask, pixels);                            \
+                                                                               \
+    num_blocks--;                                                              \
+    block++;                                                                   \
+  }                                                                            \
+}                                                                              \
 
 #ifndef NEON_BUILD
 
@@ -2773,11 +2819,15 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 
   triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y);
 
+#ifdef PROFILE
   triangles++;
+#endif
 
   if(triangle_area == 0)
   {
+#ifdef PROFILE
     trivial_rejects++;
+#endif
     return;
   }
 
@@ -2797,7 +2847,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 
   if((y_bottom - y_top) >= 512)
   {
+#ifdef PROFILE
     trivial_rejects++;
+#endif
     return;
   }
 
@@ -2821,14 +2873,18 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 
   if((c->x - a->x) >= 1024)
   {
+#ifdef PROFILE
     trivial_rejects++;
+#endif
     return;
   }
 
   if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,
    y_bottom) == 0)
   {
+#ifdef PROFILE
     trivial_rejects++;
+#endif
     return;
   }
 
@@ -2922,7 +2978,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
       break;
   }
 
+#ifdef PROFILE
   spans += psx_gpu->num_spans;
+#endif
 
   u32 render_state = flags &
    (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | 
@@ -2934,7 +2992,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
   {
     psx_gpu->render_state = render_state;
     flush_render_block_buffer(psx_gpu);
+#ifdef PROFILE
     state_changes++;
+#endif
   }
 
   psx_gpu->primitive_type = PRIMITIVE_TYPE_TRIANGLE;
@@ -3369,7 +3429,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   u32 left_offset = u & 0x7;
   u32 width_rounded = width + left_offset + 7;
 
-  u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - left_offset);
+  u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset);
   u32 right_width = width_rounded & 0x7;
   u32 block_width = width_rounded / 8;
   u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8);
@@ -3519,7 +3579,10 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   {
     blocks_remaining = block_width - 1;
     num_blocks += block_width;
+
+#ifdef PROFILE
     sprite_blocks += block_width;
+#endif
 
     if(num_blocks > MAX_BLOCKS)
     {
@@ -3654,6 +3717,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
   s32 x_right = x + width - 1;
   s32 y_bottom = y + height - 1;
 
+#ifdef PROFILE
+  sprites++;
+#endif
+
   if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right,
    y_bottom) == 0)
   {
@@ -3685,10 +3752,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
   if((width <= 0) || (height <= 0))
     return;
 
-  sprites++;
-
+#ifdef PROFILE
   span_pixels += width * height;
   spans += height;
+#endif
 
   u32 render_state = flags &
    (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
@@ -3701,7 +3768,9 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
   {
     psx_gpu->render_state = render_state;
     flush_render_block_buffer(psx_gpu);
+#ifdef PROFILE
     state_changes++;
+#endif
   }
 
   psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE;
@@ -4032,7 +4101,9 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,
 
   u32 control_mask;
 
+#ifdef PROFILE
   lines++;
+#endif
 
   if(vertex_a->x >= vertex_b->x)
   {
@@ -4276,8 +4347,6 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
 
   invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
 
-  //printf("copy for %d, %d\n", width, height);
-
   for(draw_y = 0; draw_y < height; draw_y++)
   {
     for(draw_x = 0; draw_x < width; draw_x++)
@@ -4329,15 +4398,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
 
   psx_gpu->test_mask = test_mask;
 
-  psx_gpu->pixel_count_mode = 0;
-  psx_gpu->pixel_compare_mode = 0;
-
-  psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512);
-  psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512);
-  memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512);
-  memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512);
-  psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512);
-
   psx_gpu->dirty_textures_4bpp_mask = 0xFFFFFFFF;
   psx_gpu->dirty_textures_8bpp_mask = 0xFFFFFFFF;
   psx_gpu->dirty_textures_8bpp_alternate_mask = 0xFFFFFFFF;
@@ -4354,6 +4414,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
 
   psx_gpu->vram_ptr = vram;
 
+  psx_gpu->texture_page_base = psx_gpu->vram_ptr;
   psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;
   psx_gpu->clut_ptr = psx_gpu->vram_ptr;
 
@@ -4374,7 +4435,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
   // d1: (2 3 6 7): y0
   // d2: (4 5 6 7): x0 ^ y0
 
-
   psx_gpu->dither_table[0] = dither_table_row(-4, 0, -3, 1);
   psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1);
   psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0);
index 49425ce..137dda9 100644 (file)
@@ -130,12 +130,11 @@ typedef struct
   u32 dirty_textures_8bpp_alternate_mask;
 
   u32 triangle_color;
-  u32 primitive_color;
-
   u32 dither_table[4];
 
   struct render_block_handler_struct *render_block_handler;
   void *texture_page_ptr;
+  void *texture_page_base;
   u16 *clut_ptr;
   u16 *vram_ptr;
 
@@ -189,13 +188,6 @@ typedef struct
   u8 texture_4bpp_cache[32][256 * 256];
   u8 texture_8bpp_even_cache[16][256 * 256];
   u8 texture_8bpp_odd_cache[16][256 * 256];
-
-  u32 pixel_count_mode;
-  u32 pixel_compare_mode;
-
-  u8 *vram_pixel_counts_a;
-  u8 *vram_pixel_counts_b;
-  u16 *compare_vram;
 } psx_gpu_struct;
 
 typedef struct __attribute__((aligned(16)))
index 54605b8..fd99798 100644 (file)
 #define psx_gpu_dirty_textures_8bpp_mask_offset           172
 #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176
 #define psx_gpu_triangle_color_offset                     180
-#define psx_gpu_primitive_color_offset                    184
-#define psx_gpu_dither_table_offset                       188
-#define psx_gpu_render_block_handler_offset               204
-#define psx_gpu_texture_page_ptr_offset                   208
+#define psx_gpu_dither_table_offset                       184
+#define psx_gpu_render_block_handler_offset               200
+#define psx_gpu_texture_page_ptr_offset                   204
+#define psx_gpu_texture_page_base_offset                  208
 #define psx_gpu_clut_ptr_offset                           212
 #define psx_gpu_vram_ptr_offset                           216
 
@@ -1955,6 +1955,8 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
   vdup.u16 colors, color
 
   add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset
+  orr color, color, lsl #16
+
 
  0:
   ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
@@ -1981,12 +1983,21 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
 
  3:
   ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ]
-  eor right_mask, right_mask, #0xFF
 
- 4:
-  strh color, [ fb_ptr ], #2
-  movs right_mask, right_mask, lsr #1
-  bne 4b
+  cmp right_mask, #0x0
+  beq 5f
+
+  tst right_mask, #0xF
+  streq color, [ fb_ptr ], #4
+  moveq right_mask, right_mask, lsr #4
+  streq color, [ fb_ptr ], #4
+
+  tst right_mask, #0x3
+  streq color, [ fb_ptr ], #4
+  moveq right_mask, right_mask, lsr #2
+
+  tst right_mask, #0x1
+  streqh color, [ fb_ptr ]
 
  1:
   add span_edge_data, span_edge_data, #8
@@ -1997,6 +2008,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
 
   ldmia sp!, { r4 - r11, pc }
                                                                            
+ 5:
+  vst1.u32 { colors }, [ fb_ptr ]
+  bal 1b
 
 
 #undef c_64
@@ -2337,6 +2351,7 @@ setup_blocks_shaded_untextured_indirect_builder(dithered)
 
 #define draw_mask                                         q0
 #define pixels_low                                        d16
+#define pixels_high                                       d17
 
 
 
@@ -2500,23 +2515,67 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct)       \
  3:                                                                            \
   setup_blocks_shaded_untextured_dither_a_##dithering();                       \
                                                                                \
-  ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ];           \
+  ldrh right_mask, [ span_edge_data, #edge_data_right_mask_offset ];           \
   setup_blocks_shaded_untextured_dither_b_##dithering();                       \
                                                                                \
   vshr.u8 r_whole_8, r_whole_8, #3;                                            \
+  rbit right_mask, right_mask;                                                 \
   vmov pixels, msb_mask;                                                       \
   vbic.u8 gb_whole_8, gb_whole_8, d128_0x7;                                    \
-  eor right_mask, right_mask, #0xFF;                                           \
+  clz right_mask, right_mask;                                                  \
                                                                                \
   vmlal.u8 pixels, r_whole_8, d64_1;                                           \
   vmlal.u8 pixels, g_whole_8, d64_4;                                           \
   vmlal.u8 pixels, b_whole_8, d64_128;                                         \
                                                                                \
+  ldr pc, [ pc, right_mask, lsl #2 ];                                          \
+  nop;                                                                         \
+  nop;                                                                         \
+  .word 4f;                                                                    \
+  .word 5f;                                                                    \
+  .word 6f;                                                                    \
+  .word 7f;                                                                    \
+  .word 8f;                                                                    \
+  .word 9f;                                                                    \
+  .word 10f;                                                                   \
+  .word 11f;                                                                   \
+                                                                               \
  4:                                                                            \
-  vst1.u16 { pixels_low[0] }, [ fb_ptr ]!;                                     \
-  vext.16 pixels, pixels, #1;                                                  \
-  movs right_mask, right_mask, lsr #1;                                         \
-  bne 4b;                                                                      \
+  vst1.u16 { pixels_low[0] }, [ fb_ptr ];                                      \
+  bal 1f;                                                                      \
+                                                                               \
+ 5:                                                                            \
+  vst1.u32 { pixels_low[0] }, [ fb_ptr ];                                      \
+  bal 1f;                                                                      \
+                                                                               \
+ 6:                                                                            \
+  vst1.u32 { pixels_low[0] }, [ fb_ptr ]!;                                     \
+  vst1.u16 { pixels_low[2] }, [ fb_ptr ];                                      \
+  bal 1f;                                                                      \
+                                                                               \
+ 7:                                                                            \
+  vst1.u32 { pixels_low }, [ fb_ptr ];                                         \
+  bal 1f;                                                                      \
+                                                                               \
+ 8:                                                                            \
+  vst1.u32 { pixels_low }, [ fb_ptr ]!;                                        \
+  vst1.u16 { pixels_high[0] }, [ fb_ptr ];                                     \
+  bal 1f;                                                                      \
+                                                                               \
+ 9:                                                                            \
+  vst1.u32 { pixels_low }, [ fb_ptr ]!;                                        \
+  vst1.u32 { pixels_high[0] }, [ fb_ptr ]!;                                    \
+  bal 1f;                                                                      \
+                                                                               \
+ 10:                                                                           \
+  vst1.u32 { pixels_low }, [ fb_ptr ]!;                                        \
+  vst1.u32 { pixels_high[0] }, [ fb_ptr ]!;                                    \
+  vst1.u16 { pixels_high[2] }, [ fb_ptr ];                                     \
+  bal 1f;                                                                      \
+                                                                               \
+ 11:                                                                           \
+  vst1.u32 { pixels }, [ fb_ptr ];                                             \
+  bal 1f;                                                                      \
                                                                                \
  1:                                                                            \
   add span_uvrg_offset, span_uvrg_offset, #16;                                 \
@@ -2957,6 +3016,8 @@ function(texture_blocks_16bpp)
 #define psx_gpu                                  r0
 #define num_blocks                               r1
 #define color_ptr                                r2
+#define colors_scalar                            r2
+#define colors_scalar_compare                    r3
 #define mask_msb_ptr                             r2
 
 #define block_ptr_load_a                         r0
@@ -3013,9 +3074,21 @@ function(texture_blocks_16bpp)
   add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset;                         \
   vld1.u16 { msb_mask_low[], msb_mask_high[] }, [ mask_msb_ptr, :16 ]          \
 
-#define shade_blocks_textured_modulated_prologue_shaded()                      \
 
-#define shade_blocks_textured_modulated_prologue_unshaded()                    \
+#define shade_blocks_textured_modulated_prologue_shaded(dithering, target)     \
+  
+#define shade_blocks_textured_false_modulation_check_undithered(target)        \
+  ldr colors_scalar, [ psx_gpu, #psx_gpu_triangle_color_offset ];              \
+  movw colors_scalar_compare, #0x8080;                                         \
+                                                                               \
+  movt colors_scalar_compare, #0x80;                                           \
+  cmp colors_scalar, colors_scalar_compare;                                    \
+  beq shade_blocks_textured_unmodulated_##target                               \
+
+#define shade_blocks_textured_false_modulation_check_dithered(target)          \
+
+#define shade_blocks_textured_modulated_prologue_unshaded(dithering, target)   \
+  shade_blocks_textured_false_modulation_check_##dithering(target);            \
   add color_ptr, psx_gpu, #psx_gpu_triangle_color_offset;                      \
   vld1.u32 { colors_r[] }, [ color_ptr, :32 ];                                 \
   vdup.u8 colors_g, colors_r[1];                                               \
@@ -3086,13 +3159,13 @@ function(texture_blocks_16bpp)
 .align 3;                                                                      \
                                                                                \
 function(shade_blocks_##shading##_textured_modulated_##dithering##_##target)   \
+  shade_blocks_textured_modulated_prologue_##shading(dithering, target);       \
   stmdb sp!, { r4 - r5, lr };                                                  \
   ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ];                    \
                                                                                \
   vld1.u32 { test_mask }, [ psx_gpu, :128 ];                                   \
                                                                                \
   shade_blocks_textured_modulated_prologue_##target();                         \
-  shade_blocks_textured_modulated_prologue_##shading();                        \
                                                                                \
   add block_ptr_load_a, psx_gpu, #psx_gpu_blocks_offset;                       \
   mov c_32, #32;                                                               \
@@ -5271,7 +5344,7 @@ function(update_texture_4bpp_cache)
 
   ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ]
 
-  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ]
+  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ]
   ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
 
   and current_texture_page_x, current_texture_page, #0xF
@@ -5375,7 +5448,7 @@ function(update_texture_8bpp_cache_slice)
   ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ]
   ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
 
-  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ]
+  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ]
   mov tile_y, #16
 
   and texture_page_x, texture_page, #0xF
index 1eadc79..0f85604 100644 (file)
@@ -20,7 +20,6 @@
 
 extern u32 span_pixels;
 extern u32 span_pixel_blocks;
-extern u32 span_pixel_blocks_unaligned;
 extern u32 spans;
 extern u32 triangles;
 extern u32 sprites;
@@ -38,9 +37,6 @@ extern u32 texel_blocks_8bpp;
 extern u32 texel_blocks_16bpp;
 extern u32 texel_blocks_untextured;
 extern u32 blend_blocks;
-extern u32 untextured_pixels;
-extern u32 blend_pixels;
-extern u32 transparent_pixels;
 extern u32 render_buffer_flushes;
 extern u32 state_changes;
 extern u32 trivial_rejects;
@@ -49,8 +45,7 @@ extern u32 flat_triangles;
 extern u32 clipped_triangles;
 extern u32 zero_block_spans;
 extern u32 texture_cache_loads;
-extern u32 false_modulated_triangles;
-extern u32 false_modulated_sprites;
+extern u32 false_modulated_blocks;
 
 static u32 mismatches;
 
@@ -64,7 +59,7 @@ typedef struct
 static gpu_dump_struct state;
 
 psx_gpu_struct __attribute__((aligned(256))) _psx_gpu;
-u16 __attribute__((aligned(256))) _vram[1024 * 512];
+u16 __attribute__((aligned(256))) _vram[(1024 * 512) + 1024];
 
 #define percent_of(numerator, denominator)                                     \
   ((((double)(numerator)) / (denominator)) * 100.0)                            \
@@ -81,7 +76,6 @@ void clear_stats(void)
   lines = 0;
   span_pixels = 0;
   span_pixel_blocks = 0;
-  span_pixel_blocks_unaligned = 0;
   spans = 0;
   texels_4bpp = 0;
   texels_8bpp = 0;
@@ -91,9 +85,6 @@ void clear_stats(void)
   texel_blocks_8bpp = 0;
   texel_blocks_16bpp = 0;
   blend_blocks = 0;
-  untextured_pixels = 0;
-  blend_pixels = 0;
-  transparent_pixels = 0;
   render_buffer_flushes = 0;
   state_changes = 0;
   trivial_rejects = 0;
@@ -102,8 +93,7 @@ void clear_stats(void)
   clipped_triangles = 0;
   zero_block_spans = 0;
   texture_cache_loads = 0;
-  false_modulated_triangles = 0;
-  false_modulated_sprites = 0;
+  false_modulated_blocks = 0;
 }
 
 void update_screen(psx_gpu_struct *psx_gpu, SDL_Surface *screen)
@@ -165,7 +155,7 @@ int main(int argc, char *argv[])
   size = ftell(list_file);
   fseek(list_file, 0, SEEK_SET);
   //size = 0;
-  
+
   list = malloc(size);
   fread(list, 1, size, list_file);
   fclose(list_file);
@@ -175,44 +165,26 @@ int main(int argc, char *argv[])
     SDL_Init(SDL_INIT_EVERYTHING);
     screen = SDL_SetVideoMode(1024, 512, 32, 0);
   }
-  
-  initialize_psx_gpu(psx_gpu, _vram);
 
 #ifdef NEON_BUILD
   system("ofbset -fb /dev/fb1 -mem 6291456 -en 0");
   u32 fbdev_handle = open("/dev/fb1", O_RDWR);
-  psx_gpu->vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE,
+  u16 *vram_ptr =
+  vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE,
    MAP_SHARED | 0xA0000000, fbdev_handle, 0));
-  psx_gpu->vram_ptr += 64;
-#endif
-
+  vram_ptr += 64;
 
+  initialize_psx_gpu(psx_gpu, vram_ptr + 64);
+#else
+  initialize_psx_gpu(psx_gpu, _vram + 64);
+#endif
 
 #ifdef NEON_BUILD
   //triangle_benchmark(psx_gpu);
   //return 0;
 #endif
 
-#ifdef FULL_COMPARE_MODE
-  psx_gpu->pixel_count_mode = 1; 
-  psx_gpu->pixel_compare_mode = 0;
   memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2);
-  //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512);
-  gpu_parse(psx_gpu, list, size);
-
-  psx_gpu->pixel_count_mode = 0;
-  psx_gpu->pixel_compare_mode = 1;
-  memcpy(psx_gpu->compare_vram, state.vram, 1024 * 512 * 2); 
-  memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2);
-  //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512);
-  clear_stats();
-  gpu_parse(psx_gpu, list, size);
-  flush_render_block_buffer(psx_gpu);
-#else
-  memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2);
-
-  psx_gpu->pixel_count_mode = 0;
-  psx_gpu->pixel_compare_mode = 0;
 
   clear_stats();
 
@@ -232,7 +204,7 @@ int main(int argc, char *argv[])
   gpu_parse(psx_gpu, list, size);
   flush_render_block_buffer(psx_gpu);
 
-  printf("%s: ", argv[1]);
+  printf("%-64s: ", argv[1]);
 #ifdef NEON_BUILD
   u32 cycles_elapsed = get_counter() - cycles;
 
@@ -265,17 +237,14 @@ int main(int argc, char *argv[])
     }
   }
 #endif
-#endif
 
 #if 0
   printf("\n");
-  printf("  %d pixels, %d pixel blocks (%d unaligned), %d spans\n"
-   "   (%lf pixels per block (%lf unaligned, r %lf), %lf pixels per span),\n"
+  printf("  %d pixels, %d pixel blocks, %d spans\n"
+   "   (%lf pixels per block, %lf pixels per span),\n"
    "   %lf blocks per span (%lf per non-zero span), %lf overdraw)\n\n",
-   span_pixels, span_pixel_blocks, span_pixel_blocks_unaligned, spans,
+   span_pixels, span_pixel_blocks, spans,
    (double)span_pixels / span_pixel_blocks,
-   (double)span_pixels / span_pixel_blocks_unaligned,
-   (double)span_pixel_blocks / span_pixel_blocks_unaligned,
    (double)span_pixels / spans,
    (double)span_pixel_blocks / spans, 
    (double)span_pixel_blocks / (spans - zero_block_spans),
@@ -283,10 +252,10 @@ int main(int argc, char *argv[])
    ((psx_gpu->viewport_end_x - psx_gpu->viewport_start_x) * 
    (psx_gpu->viewport_end_y - psx_gpu->viewport_start_y)));
 
-  printf("  %d triangles (%d false modulated)\n"
+  printf("  %d triangles\n"
    "   (%d trivial rejects, %lf%% flat, %lf%% left split, %lf%% clipped)\n"
    "   (%lf pixels per triangle, %lf rows per triangle)\n\n",
-   triangles, false_modulated_triangles, trivial_rejects,
+   triangles, trivial_rejects,
    percent_of(flat_triangles, triangles),
    percent_of(left_split_triangles, triangles),
    percent_of(clipped_triangles, triangles),
@@ -306,6 +275,8 @@ int main(int argc, char *argv[])
    percent_of(sprite_blocks, span_pixel_blocks));
   printf("   %7d blended blocks     (%lf%%)\n", blend_blocks,
    percent_of(blend_blocks, span_pixel_blocks));
+  printf("   %7d false-mod blocks   (%lf%%)\n", false_modulated_blocks,
+   percent_of(false_modulated_blocks, span_pixel_blocks));
   printf("\n");
   printf("  %lf blocks per render buffer flush\n", (double)span_pixel_blocks /
    render_buffer_flushes);
index f6143ee..fc9f3fb 100644 (file)
@@ -38,61 +38,53 @@ const u8 command_lengths[256] =
 
 void update_texture_ptr(psx_gpu_struct *psx_gpu)
 {
+  u8 *texture_base;
   u8 *texture_ptr;
 
   switch((psx_gpu->render_state_base >> 8) & 0x3)
   {
     default:
     case TEXTURE_MODE_4BPP:
-#ifdef TEXTURE_CACHE_4BPP
-      texture_ptr = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page];
+      texture_base = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page];
+
+      texture_ptr = texture_base;
       texture_ptr += psx_gpu->texture_window_x & 0xF;
       texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4;
       texture_ptr += (psx_gpu->texture_window_x >> 4) << 8;
       texture_ptr += (psx_gpu->texture_window_y >> 4) << 12;
-#else
-      texture_ptr = (u8 *)(psx_gpu->vram_ptr);
-      texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128;
-      texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
-      texture_ptr += psx_gpu->texture_window_x / 2;
-      texture_ptr += (psx_gpu->texture_window_y) * 2048;
-#endif
       break;
 
     case TEXTURE_MODE_8BPP:
-#ifdef TEXTURE_CACHE_8BPP
       if(psx_gpu->current_texture_page & 0x1)
       {
-        texture_ptr =
+        texture_base =
          psx_gpu->texture_8bpp_odd_cache[psx_gpu->current_texture_page >> 1];
       }
       else
       {
-        texture_ptr =
+        texture_base =
          psx_gpu->texture_8bpp_even_cache[psx_gpu->current_texture_page >> 1];
       }
       
+      texture_ptr = texture_base;
+      texture_ptr += psx_gpu->texture_window_x & 0xF;
       texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4;
       texture_ptr += (psx_gpu->texture_window_x >> 4) << 8;
       texture_ptr += (psx_gpu->texture_window_y >> 4) << 12;
-#else
-      texture_ptr = (u8 *)(psx_gpu->vram_ptr);
-      texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128;
-      texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
-      texture_ptr += psx_gpu->texture_window_x;
-      texture_ptr += (psx_gpu->texture_window_y) * 2048;
-#endif
       break;
 
     case TEXTURE_MODE_16BPP:
-      texture_ptr = (u8 *)(psx_gpu->vram_ptr);
-      texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128;
-      texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
+      texture_base = (u8 *)(psx_gpu->vram_ptr);
+      texture_base += (psx_gpu->current_texture_page & 0xF) * 128;
+      texture_base += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
+
+      texture_ptr = texture_base;
       texture_ptr += psx_gpu->texture_window_x * 2;
       texture_ptr += (psx_gpu->texture_window_y) * 2048;
       break;
   }
 
+  psx_gpu->texture_page_base = texture_base;
   psx_gpu->texture_page_ptr = texture_ptr;  
 }
 
@@ -447,8 +439,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
         u32 width = list_s16[4] & 0x3FF;
         u32 height = list_s16[5] & 0x1FF;
 
-        psx_gpu->primitive_color = list[0] & 0xFFFFFF;
-
         render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]);
                        break;
       }
@@ -461,7 +451,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
         u32 width = list_s16[6] & 0x3FF;
         u32 height = list_s16[7] & 0x1FF;
 
-        psx_gpu->primitive_color = list[0] & 0xFFFFFF;
         set_clut(psx_gpu, list_s16[5]);
 
         render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height,
@@ -477,8 +466,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
         s32 x = list_s16[2] + psx_gpu->offset_x;
         s32 y = list_s16[3] + psx_gpu->offset_y;
 
-        psx_gpu->primitive_color = list[0] & 0xFFFFFF;
-
         render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]);
                        break;
       }
@@ -491,8 +478,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
         s32 x = list_s16[2] + psx_gpu->offset_x;
         s32 y = list_s16[3] + psx_gpu->offset_y;
 
-        psx_gpu->primitive_color = list[0] & 0xFFFFFF;
-
         render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]);
                        break;
       }
@@ -506,7 +491,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
         s32 y = list_s16[3] + psx_gpu->offset_y;
         u32 uv = list_s16[4];
 
-        psx_gpu->primitive_color = list[0] & 0xFFFFFF;
         set_clut(psx_gpu, list_s16[5]);
 
         render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8,
@@ -522,7 +506,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
         s32 x = list_s16[2] + psx_gpu->offset_x;
         s32 y = list_s16[3] + psx_gpu->offset_y;
 
-        psx_gpu->primitive_color = list[0] & 0xFFFFFF;
         render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]);
                        break;
       }
@@ -536,7 +519,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
         s32 y = list_s16[3] + psx_gpu->offset_y;
         u32 uv = list_s16[4];
 
-        psx_gpu->primitive_color = list[0] & 0xFFFFFF;
         set_clut(psx_gpu, list_s16[5]);
 
         render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16,