psx_gpu: workaround overflow crash
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu.c
index 3de2ece..2cba878 100644 (file)
@@ -767,6 +767,26 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
     printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b)                  \
 
 
+#ifndef NDEBUG
+#define setup_spans_debug_check(span_edge_data_element)                        \
+{                                                                              \
+  u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data;          \
+  if (_num_spans > MAX_SPANS)                                                  \
+    *(int *)0 = 1;                                                             \
+  if (_num_spans < psx_gpu->num_spans)                                         \
+  {                                                                            \
+    if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW)                 \
+      *(int *)0 = 1;                                                           \
+    if(span_edge_data_element.y > 2048)                                        \
+      *(int *)0 = 1;                                                           \
+  }                                                                            \
+}                                                                              \
+
+#else
+#define setup_spans_debug_check(span_edge_data_element)                        \
+
+#endif
+
 #define setup_spans_prologue_alternate_yes()                                   \
   vec_2x64s alternate_x;                                                       \
   vec_2x64s alternate_dx_dy;                                                   \
@@ -1070,6 +1090,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
     span_edge_data[i].num_blocks = left_right_x_16.high.e[i];                  \
     span_edge_data[i].right_mask = span_shift.e[i];                            \
     span_edge_data[i].y = y_x4.e[i];                                           \
+    setup_spans_debug_check(span_edge_data[i]);                                \
   }                                                                            \
                                                                                \
   span_edge_data += 4;                                                         \
@@ -1407,12 +1428,16 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
     y_x4.e[3] = y_a + 3;
     setup_spans_adjust_edges_alternate_no(index_left, index_right);
 
+    // FIXME: overflow corner case
+    if(psx_gpu->num_spans + height_minor_b == MAX_SPANS)
+      height_minor_b &= ~3;
+
     psx_gpu->num_spans += height_minor_b;
-    do
+    while(height_minor_b > 0)
     {
       setup_spans_set_x4(none, down, no);
       height_minor_b -= 4;
-    } while(height_minor_b > 0);
+    }
   }
 
   left_split_triangles++;
@@ -3869,6 +3894,11 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
 void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
  s32 width, s32 height, u32 color);
 
+void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color);
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color);
+
 #ifndef NEON_BUILD
 setup_sprite_tiled_builder(4bpp,);
 setup_sprite_tiled_builder(8bpp,);
@@ -3997,11 +4027,16 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   }
 }
 
-#endif
-
 void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
  s32 v, s32 width, s32 height, u32 color)
 {
+  if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
+   RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0)
+  {
+    setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
+    return;
+  }
+
   u32 right_width = ((width - 1) & 0x7) + 1;
   u32 right_mask_bits = (0xFF << right_width);
   u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x;
@@ -4067,6 +4102,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   }
 }
 
+#endif
+
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color)
+{
+  u32 r = color & 0xFF;
+  u32 g = (color >> 8) & 0xFF;
+  u32 b = (color >> 16) & 0xFF;
+  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+   psx_gpu->mask_msb;
+  u32 color_32bpp = color_16bpp | (color_16bpp << 16);
+
+  u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024);
+  u32 *vram_ptr;
+
+  u32 num_width;
+
+  if(psx_gpu->num_blocks > MAX_BLOCKS)
+  {
+    flush_render_block_buffer(psx_gpu);
+  }
+
+  while(height)
+  {
+    num_width = width;
+
+    vram_ptr = (void *)vram_ptr16;
+    if((long)vram_ptr16 & 2)
+    {
+      *vram_ptr16 = color_32bpp;
+      vram_ptr = (void *)(vram_ptr16 + 1);
+      num_width--;
+    }
+
+    while(num_width >= 4 * 2)
+    {
+      vram_ptr[0] = color_32bpp;
+      vram_ptr[1] = color_32bpp;
+      vram_ptr[2] = color_32bpp;
+      vram_ptr[3] = color_32bpp;
+
+      vram_ptr += 4;
+      num_width -= 4 * 2;
+    }
+
+    while(num_width >= 2)
+    {
+      *vram_ptr++ = color_32bpp;
+      num_width -= 2;
+    }
+
+    if(num_width > 0)
+    {
+      *(u16 *)vram_ptr = color_32bpp;
+    }
+
+    vram_ptr16 += 1024;
+    height--;
+  }
+}
 
 
 #define setup_sprite_blocks_switch_textured(texture_mode)                      \
@@ -4906,10 +5001,10 @@ void initialize_reciprocal_table(void)
   {
     shift = __builtin_clz(height);
     height_normalized = height << shift;
-    height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) /
+    height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) /
      height_normalized;
 
-    shift = 32 - (52 - shift);
+    shift = 32 - (51 - shift);
 
     reciprocal_table[height] = (height_reciprocal << 10) | shift;
   }