don't cast between long and pointers for win64
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu.c
index 3de2ece..85cf89f 100644 (file)
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <string.h>
 
 #include "common.h"
+#ifndef NEON_BUILD
+#include "vector_ops.h"
+#endif
+#include "psx_gpu_simd.h"
+
+#if 0
+void dump_r_d(const char *name, void *dump);
+void dump_r_q(const char *name, void *dump);
+#define dumprd(n) dump_r_d(#n, n.e)
+#define dumprq(n) dump_r_q(#n, n.e)
+#endif
 
 u32 span_pixels = 0;
 u32 span_pixel_blocks = 0;
@@ -47,6 +59,8 @@ u32 zero_block_spans = 0;
 u32 texture_cache_loads = 0;
 u32 false_modulated_blocks = 0;
 
+#define stats_add(stat, count) // stat += count
+
 /* double size for enhancement */
 u32 reciprocal_table[512 * 2];
 
@@ -298,9 +312,6 @@ void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1,
   }
 }
 
-void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
- u32 texture_page);
-
 #ifndef NEON_BUILD
 
 void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
@@ -449,9 +460,6 @@ void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu)
   }
 }
 
-void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-
 void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
 {
   if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) &&
@@ -515,9 +523,6 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
 }
 
 
-void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
- vertex_struct *b, vertex_struct *c);
-
 #ifndef NEON_BUILD
 
 #define setup_gradient_calculation_input(set, vertex)                          \
@@ -767,11 +772,31 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
     printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b)                  \
 
 
+#ifndef NDEBUG
+#define setup_spans_debug_check(span_edge_data_element)                        \
+{                                                                              \
+  u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data;          \
+  if (_num_spans > MAX_SPANS)                                                  \
+    *(volatile int *)0 = 1;                                                    \
+  if (_num_spans < psx_gpu->num_spans)                                         \
+  {                                                                            \
+    if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW)                 \
+      *(volatile int *)0 = 2;                                                  \
+    if(span_edge_data_element.y >= 2048)                                       \
+      *(volatile int *)0 = 3;                                                  \
+  }                                                                            \
+}                                                                              \
+
+#else
+#define setup_spans_debug_check(span_edge_data_element)                        \
+
+#endif
+
 #define setup_spans_prologue_alternate_yes()                                   \
   vec_2x64s alternate_x;                                                       \
   vec_2x64s alternate_dx_dy;                                                   \
   vec_4x32s alternate_x_32;                                                    \
-  vec_2x32s alternate_x_16;                                                    \
+  vec_4x16u alternate_x_16;                                                    \
                                                                                \
   vec_4x16u alternate_select;                                                  \
   vec_4x16s y_mid_point;                                                       \
@@ -1070,6 +1095,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
     span_edge_data[i].num_blocks = left_right_x_16.high.e[i];                  \
     span_edge_data[i].right_mask = span_shift.e[i];                            \
     span_edge_data[i].y = y_x4.e[i];                                           \
+    setup_spans_debug_check(span_edge_data[i]);                                \
   }                                                                            \
                                                                                \
   span_edge_data += 4;                                                         \
@@ -1184,26 +1210,6 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
   setup_spans_up(index_##major, index_##minor, minor, yes)                     \
 
 
-void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-
-
 #ifndef NEON_BUILD
 
 void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
@@ -1407,12 +1413,16 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
     y_x4.e[3] = y_a + 3;
     setup_spans_adjust_edges_alternate_no(index_left, index_right);
 
+    // FIXME: overflow corner case
+    if(psx_gpu->num_spans + height_minor_b == MAX_SPANS)
+      height_minor_b &= ~3;
+
     psx_gpu->num_spans += height_minor_b;
-    do
+    while(height_minor_b > 0)
     {
       setup_spans_set_x4(none, down, no);
       height_minor_b -= 4;
-    } while(height_minor_b > 0);
+    }
   }
 
   left_split_triangles++;
@@ -1842,7 +1852,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
   }                                                                            \
 
 #define setup_blocks_add_blocks_direct()                                       \
-  texel_blocks_untextured += span_num_blocks;                                  \
+  stats_add(texel_blocks_untextured, span_num_blocks);                         \
   span_pixel_blocks += span_num_blocks                                         \
 
 
@@ -1916,30 +1926,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target(     \
   psx_gpu->num_blocks = num_blocks;                                            \
 }                                                                              \
 
-void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-
-void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_shaded_untextured_undithered_unswizzled_direct(
- psx_gpu_struct *psx_gpu);
-
-void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-void setup_blocks_unshaded_untextured_undithered_unswizzled_direct(
- psx_gpu_struct *psx_gpu);
-
-void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct
- *psx_gpu);
-
 
 //setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
 
@@ -1959,26 +1945,17 @@ setup_blocks_builder(shaded, untextured, dithered, unswizzled, direct);
 setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect);
 setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
 
-#endif
-
-void texture_blocks_untextured(psx_gpu_struct *psx_gpu);
-void texture_blocks_4bpp(psx_gpu_struct *psx_gpu);
-void texture_blocks_8bpp(psx_gpu_struct *psx_gpu);
-void texture_blocks_16bpp(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
-
 void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
 {
   if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
-    texel_blocks_untextured += psx_gpu->num_blocks;
+    stats_add(texel_blocks_untextured, psx_gpu->num_blocks);
 }
 
 void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
 {
   block_struct *block = psx_gpu->blocks;
   u32 num_blocks = psx_gpu->num_blocks;
-  texel_blocks_4bpp += num_blocks;
+  stats_add(texel_blocks_4bpp, num_blocks);
 
   vec_8x8u texels_low;
   vec_8x8u texels_high;
@@ -2030,7 +2007,7 @@ void texture_blocks_8bpp(psx_gpu_struct *psx_gpu)
   block_struct *block = psx_gpu->blocks;
   u32 num_blocks = psx_gpu->num_blocks;
 
-  texel_blocks_8bpp += num_blocks;
+  stats_add(texel_blocks_8bpp, num_blocks);
 
   if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
     update_texture_8bpp_cache(psx_gpu);
@@ -2064,7 +2041,7 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu)
   block_struct *block = psx_gpu->blocks;
   u32 num_blocks = psx_gpu->num_blocks;
 
-  texel_blocks_16bpp += num_blocks;
+  stats_add(texel_blocks_16bpp, num_blocks);
 
   vec_8x16u texels;
 
@@ -2252,27 +2229,6 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target(       \
   }                                                                            \
 }                                                                              \
 
-void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct
- *psx_gpu);
-
-void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct
- *psx_gpu);
-
-void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
-
 #ifndef NEON_BUILD
 
 shade_blocks_textured_modulated_builder(shaded, dithered, direct);
@@ -2358,14 +2314,6 @@ void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct        \
 shade_blocks_textured_unmodulated_builder(indirect)
 shade_blocks_textured_unmodulated_builder(direct)
 
-#endif
-
-
-void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
-                                                                               
 void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu)
 {
 }
@@ -2577,27 +2525,6 @@ void                                                                           \
   }                                                                            \
 }                                                                              \
 
-void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu);
-
-void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu);
-
-void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu);
-
 #ifndef NEON_BUILD
 
 void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu)
@@ -2860,7 +2787,7 @@ blend_blocks_builder(textured, unblended, on);
   render_blocks_switch_block_texture_mode(4bpp),                               \
   render_blocks_switch_block_texture_mode(8bpp),                               \
   render_blocks_switch_block_texture_mode(16bpp),                              \
-  render_blocks_switch_block_texture_mode(4bpp)                                \
+  render_blocks_switch_block_texture_mode(16bpp)                               \
 
 
 render_block_handler_struct render_triangle_block_handlers[] =
@@ -3150,10 +3077,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
     render_triangle_p(psx_gpu, vertex_ptrs, flags);
 }
 
-
-void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
 
 void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
 {
@@ -3206,7 +3130,7 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
 
 
 #define setup_sprite_tile_fetch_texel_block_8bpp(offset)                       \
-  texture_block_ptr = psx_gpu->texture_page_ptr +                              \
+  texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr +                        \
    ((texture_offset + offset) & texture_mask);                                 \
                                                                                \
   load_64b(texels, texture_block_ptr)                                          \
@@ -3314,7 +3238,7 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
 
 #define setup_sprite_tile_half_8bpp(edge)                                      \
 {                                                                              \
-  setup_sprite_tile_add_blocks(sub_tile_height * 2);                           \
+  setup_sprite_tile_add_blocks(sub_tile_height);                               \
                                                                                \
   while(sub_tile_height)                                                       \
   {                                                                            \
@@ -3768,7 +3692,7 @@ void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\
   u32 num_blocks = psx_gpu->num_blocks;                                        \
   block_struct *block = psx_gpu->blocks + num_blocks;                          \
                                                                                \
-  u16 *texture_block_ptr;                                                      \
+  u8 *texture_block_ptr;                                                       \
   vec_8x8u texels;                                                             \
                                                                                \
   setup_sprite_tiled_initialize_##texture_mode##x4mode();                      \
@@ -3855,26 +3779,15 @@ void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\
   }                                                                            \
 }                                                                              \
 
-void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-
-void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-
 #ifndef NEON_BUILD
 setup_sprite_tiled_builder(4bpp,);
 setup_sprite_tiled_builder(8bpp,);
 
 setup_sprite_tiled_builder(4bpp,_4x);
 setup_sprite_tiled_builder(8bpp,_4x);
+#endif
+
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
 
 void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
  s32 v, s32 width, s32 height, u32 color)
@@ -3903,7 +3816,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
 
   texture_offset_base &= ~0x7;
 
-  sprites_16bpp++;
+  stats_add(sprites_16bpp, 1);
 
   if(block_width == 1)
   {
@@ -3924,7 +3837,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
       texture_block_ptr =
        texture_page_ptr + (texture_offset_base & texture_mask);
 
-      load_128b(block->texels, texture_block_ptr);
+      block->texels = *(vec_8x16u *)texture_block_ptr;
       block->draw_mask_bits = mask_bits;
       block->fb_ptr = fb_ptr;
 
@@ -3958,7 +3871,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
       texture_offset_base += 1024;
 
       texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
-      load_128b(block->texels, texture_block_ptr);
+      block->texels = *(vec_8x16u *)texture_block_ptr;
 
       block->draw_mask_bits = left_mask_bits;
       block->fb_ptr = fb_ptr;
@@ -3970,7 +3883,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
       while(blocks_remaining)
       {
         texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
-        load_128b(block->texels, texture_block_ptr);
+        block->texels = *(vec_8x16u *)texture_block_ptr;
 
         block->draw_mask_bits = 0;
         block->fb_ptr = fb_ptr;
@@ -3983,7 +3896,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
       }
 
       texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
-      load_128b(block->texels, texture_block_ptr);
+      block->texels = *(vec_8x16u *)texture_block_ptr;
 
       block->draw_mask_bits = right_mask_bits;
       block->fb_ptr = fb_ptr;
@@ -3999,9 +3912,19 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
 
 #endif
 
+#ifndef NEON_BUILD
+
 void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
  s32 v, s32 width, s32 height, u32 color)
 {
+  if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
+   RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 &&
+   (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0)
+  {
+    setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
+    return;
+  }
+
   u32 right_width = ((width - 1) & 0x7) + 1;
   u32 right_mask_bits = (0xFF << right_width);
   u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x;
@@ -4067,6 +3990,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   }
 }
 
+#endif
+
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color)
+{
+  u32 r = color & 0xFF;
+  u32 g = (color >> 8) & 0xFF;
+  u32 b = (color >> 16) & 0xFF;
+  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+   psx_gpu->mask_msb;
+  u32 color_32bpp = color_16bpp | (color_16bpp << 16);
+
+  u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024);
+  u32 *vram_ptr;
+
+  u32 num_width;
+
+  if(psx_gpu->num_blocks > MAX_BLOCKS)
+  {
+    flush_render_block_buffer(psx_gpu);
+  }
+
+  while(height)
+  {
+    num_width = width;
+
+    vram_ptr = (void *)vram_ptr16;
+    if((uintptr_t)vram_ptr16 & 2)
+    {
+      *vram_ptr16 = color_32bpp;
+      vram_ptr = (void *)(vram_ptr16 + 1);
+      num_width--;
+    }
+
+    while(num_width >= 4 * 2)
+    {
+      vram_ptr[0] = color_32bpp;
+      vram_ptr[1] = color_32bpp;
+      vram_ptr[2] = color_32bpp;
+      vram_ptr[3] = color_32bpp;
+
+      vram_ptr += 4;
+      num_width -= 4 * 2;
+    }
+
+    while(num_width >= 2)
+    {
+      *vram_ptr++ = color_32bpp;
+      num_width -= 2;
+    }
+
+    if(num_width > 0)
+    {
+      *(u16 *)vram_ptr = color_32bpp;
+    }
+
+    vram_ptr16 += 1024;
+    height--;
+  }
+}
 
 
 #define setup_sprite_blocks_switch_textured(texture_mode)                      \
@@ -4155,7 +4138,7 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   render_sprite_blocks_switch_block_texture_mode(4bpp),                        \
   render_sprite_blocks_switch_block_texture_mode(8bpp),                        \
   render_sprite_blocks_switch_block_texture_mode(16bpp),                       \
-  render_sprite_blocks_switch_block_texture_mode(4bpp)                         \
+  render_sprite_blocks_switch_block_texture_mode(16bpp)                        \
 
 
 render_block_handler_struct render_sprite_block_handlers[] =
@@ -4248,7 +4231,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
 }
 
 #define draw_pixel_line_mask_evaluate_yes()                                    \
-  if(*vram_ptr & 0x8000)                                                       \
+  if((*vram_ptr & 0x8000) == 0)                                                \
 
 #define draw_pixel_line_mask_evaluate_no()                                     \
     
@@ -4906,10 +4889,10 @@ void initialize_reciprocal_table(void)
   {
     shift = __builtin_clz(height);
     height_normalized = height << shift;
-    height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) /
+    height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) /
      height_normalized;
 
-    shift = 32 - (52 - shift);
+    shift = 32 - (51 - shift);
 
     reciprocal_table[height] = (height_reciprocal << 10) | shift;
   }
@@ -4960,6 +4943,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
   memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
 
   initialize_reciprocal_table();
+  psx_gpu->reciprocal_table_ptr = reciprocal_table;
 
   //    00 01 10 11
   // 00  0  4  1  5
@@ -4990,7 +4974,7 @@ u64 get_us(void)
   return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
 }
 
-#ifdef NEON_BUILD
+#if 0 //def NEON_BUILD
 
 u32 get_counter()
 {