frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu.c
index af24e77..4e8d3d3 100644 (file)
@@ -15,6 +15,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <stddef.h>
 #include <string.h>
 #include <assert.h>
 
@@ -23,6 +24,7 @@
 #include "vector_ops.h"
 #endif
 #include "psx_gpu_simd.h"
+#include "psx_gpu_offsets.h"
 
 #if 0
 void dump_r_d(const char *name, void *dump);
@@ -224,9 +226,9 @@ u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2)
 
   coverage_x = x1 >> 6;
 
-  mask_up_left = 0xFFFF0000 << coverage_x;
-  if(coverage_x < 0)
-    mask_up_left = 0xFFFF0000;
+  mask_up_left = 0xFFFF0000;
+  if(coverage_x > 0)
+    mask_up_left <<= coverage_x;
 
   coverage_y = y1 >> 8;
   if(coverage_y <= 0)
@@ -526,6 +528,11 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
     render_block_handler_struct *render_block_handler =
      psx_gpu->render_block_handler;
 
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+    // the asm doesn't bother to save callee-save vector regs, so do it here
+    __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
+
     render_block_handler->texture_blocks(psx_gpu);
     render_block_handler->shade_blocks(psx_gpu);
     render_block_handler->blend_blocks(psx_gpu);
@@ -536,6 +543,9 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
 #endif
 
     psx_gpu->num_blocks = 0;
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+    __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
   }
 }
 
@@ -558,8 +568,9 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
   y##set##_b.e[1] = vertex->b                                                  \
   
 
-void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
- vertex_struct *b, vertex_struct *c)
+void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
+ const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b,
+ const vertex_struct * __restrict__ c)
 {
   u32 triangle_area = psx_gpu->triangle_area;
   u32 winding_mask_scalar;
@@ -1161,6 +1172,8 @@ static void setup_spans_debug_check(psx_gpu_struct *psx_gpu,
       setup_spans_set_x4(alternate, down, alternate_active);                   \
       height -= 4;                                                             \
     } while(height > 0);                                                       \
+    if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V))   \
+      span_uvrg_offset[height - 1].low = span_uvrg_offset[height - 2].low;     \
   }                                                                            \
 
 
@@ -1214,6 +1227,8 @@ static void setup_spans_debug_check(psx_gpu_struct *psx_gpu,
       setup_spans_set_x4(alternate, up, alternate_active);                     \
       height -= 4;                                                             \
     }                                                                          \
+    if (psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_V)                           \
+      psx_gpu->span_uvrg_offset[0].low = psx_gpu->span_uvrg_offset[1].low;     \
   }                                                                            \
 
 #define index_left  0
@@ -1450,6 +1465,11 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
       setup_spans_set_x4(none, down, no);
       height_minor_b -= 4;
     }
+    if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V))
+    {
+      span_uvrg_offset[height_minor_b - 1].low =
+        span_uvrg_offset[height_minor_b - 2].low;
+    }
   }
 
   left_split_triangles++;
@@ -1457,6 +1477,41 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
 
 #endif
 
+// this is some hacky mess, can this be improved somehow?
+// ideally change things to not have to do this hack at all
+void __attribute__((noinline))
+setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block,
+    edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset)
+{
+  size_t span_i = span_uvrg_offset - psx_gpu->span_uvrg_offset;
+  if (span_i != 0 && span_i != psx_gpu->num_spans - 1
+      && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
+    return;
+  u32 num_blocks = span_edge_data->num_blocks - 1;
+  s32 offset = __builtin_ctz(span_edge_data->right_mask | 0x100) - 1;
+  s32 toffset = 8 * num_blocks + offset - 1;
+  if (toffset < 0 && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
+    return;
+
+  toffset += span_edge_data->left_x;
+  s32 u_dx = psx_gpu->uvrg_dx.low.e[0];
+  s32 v_dx = psx_gpu->uvrg_dx.low.e[1];
+  u32 u = span_uvrg_offset->low.e[0];
+  u32 v = span_uvrg_offset->low.e[1];
+  u += u_dx * toffset;
+  v += v_dx * toffset;
+  u = (u >> 16) & psx_gpu->texture_mask_width;
+  v = (v >> 16) & psx_gpu->texture_mask_height;
+  if (!(psx_gpu->render_state_base & (TEXTURE_MODE_16BPP << 8))) {
+    // 4bpp 8bpp are swizzled
+    u32 u_ = u;
+    u = (u & 0x0f) | ((v & 0x0f) << 4);
+    v = (v & 0xf0) | (u_ >> 4);
+  }
+  assert(offset >= 0);
+  //assert(block->uv.e[offset] == ((v << 8) | u));
+  block->uv.e[offset] = (v << 8) | u;
+}
 
 #define dither_table_entry_normal(value)                                       \
   (value)                                                                      \
@@ -1561,7 +1616,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
   vec_8x8s dither_offsets_short;                                               \
                                                                                \
   dither_row =                                                                 \
-   (dither_row >> dither_shift) | (dither_row << (32 - dither_shift));         \
+   (dither_row >> dither_shift) | ((u64)dither_row << (32 - dither_shift));    \
   dup_2x32b(vector_cast(vec_2x32u, dither_offsets_short), dither_row);         \
   setup_blocks_span_initialize_dithered_##texturing()                          \
 
@@ -1866,6 +1921,14 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
 
 #define setup_blocks_store_draw_mask_untextured_direct(_block, bits)           \
 
+#define setup_blocks_uv_adj_hack_untextured(_block, edge_data, uvrg_offset)    \
+
+#define setup_blocks_uv_adj_hack_textured(_block, edge_data, uvrg_offset)      \
+{                                                                              \
+  u32 m_ = AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V;                          \
+  if (unlikely(psx_gpu->hacks_active & m_))                                    \
+    setup_blocks_uv_adj_hack(psx_gpu, _block, edge_data, uvrg_offset);         \
+}                                                                              \
 
 #define setup_blocks_add_blocks_indirect()                                     \
   num_blocks += span_num_blocks;                                               \
@@ -1880,7 +1943,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
 
 #define setup_blocks_add_blocks_direct()                                       \
   stats_add(texel_blocks_untextured, span_num_blocks);                         \
-  span_pixel_blocks += span_num_blocks                                         \
+  stats_add(span_pixel_blocks, span_num_blocks);                               \
 
 
 #define setup_blocks_builder(shading, texturing, dithering, sw, target)        \
@@ -1936,6 +1999,8 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target(     \
       setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \
       setup_blocks_store_draw_mask_##texturing##_##target(block,               \
        span_edge_data->right_mask);                                            \
+      setup_blocks_uv_adj_hack_##texturing(block, span_edge_data,              \
+          span_uvrg_offset);                                                   \
                                                                                \
       block++;                                                                 \
     }                                                                          \
@@ -2861,9 +2926,9 @@ char *render_block_flag_strings[] =
    (triangle_winding_##winding << 6))                                          \
 
 static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
vertex_struct *vertexes_out[3])
prepared_triangle *triangle_out)
 {
-  s32 y_top, y_bottom;
+  s32 y_top, y_bottom, offset_x, offset_y, i;
   s32 triangle_area;
   u32 triangle_winding = 0;
 
@@ -2898,6 +2963,7 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
 
   y_bottom = c->y;
   y_top = a->y;
+  offset_y = sign_extend_11bit(y_top + psx_gpu->offset_y) - y_top;
 
   if((y_bottom - y_top) >= 512)
   {
@@ -2925,7 +2991,7 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
       vertex_swap(a, b);
   }
 
-  if((c->x - psx_gpu->offset_x) >= 1024 || (c->x - a->x) >= 1024)
+  if(c->x - a->x >= 1024)
   {
 #ifdef PROFILE
     trivial_rejects++;
@@ -2933,8 +2999,10 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
     return 0;
   }
 
-  if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,
-   y_bottom) == 0)
+  offset_x = sign_extend_11bit(a->x + psx_gpu->offset_x) - a->x;
+  if(invalidate_texture_cache_region_viewport(psx_gpu,
+      a->x + offset_x, y_top + offset_y,
+      c->x + offset_x, y_bottom + offset_y) == 0)
   {
 #ifdef PROFILE
     trivial_rejects++;
@@ -2942,12 +3010,20 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
     return 0;
   }
 
+  for (i = 0; i < 3; i++)
+  {
+    vertexes[i].x += offset_x;
+    vertexes[i].y += offset_y;
+  }
+
   psx_gpu->triangle_area = triangle_area;
   psx_gpu->triangle_winding = triangle_winding;
 
-  vertexes_out[0] = a;
-  vertexes_out[1] = b;
-  vertexes_out[2] = c;
+  triangle_out->vertexes[0] = a;
+  triangle_out->vertexes[1] = b;
+  triangle_out->vertexes[2] = c;
+  triangle_out->offset_x = offset_x;
+  triangle_out->offset_y = offset_y;
 
   return 1;
 }
@@ -2969,6 +3045,11 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
   triangle_set_direction(y_direction_b, y_delta_b);
   triangle_set_direction(y_direction_c, y_delta_c);
 
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+  // the asm doesn't bother to save callee-save vector regs, so do it here
+  __asm__ __volatile__("vstmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7) : "memory");
+#endif
+
   compute_all_gradients(psx_gpu, a, b, c);
 
   switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
@@ -3095,14 +3176,18 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
    &(render_triangle_block_handlers[render_state]);
   ((setup_blocks_function_type *)psx_gpu->render_block_handler->setup_blocks)
    (psx_gpu);
+
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+  __asm__ __volatile__("vldmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7));
+#endif
 }
 
 void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
  u32 flags)
 {
-  vertex_struct *vertex_ptrs[3];
-  if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs))
-    render_triangle_p(psx_gpu, vertex_ptrs, flags);
+  prepared_triangle triangle;
+  if (prepare_triangle(psx_gpu, vertexes, &triangle))
+    render_triangle_p(psx_gpu, triangle.vertexes, flags);
 }
 
 #if !defined(NEON_BUILD) || defined(SIMD_BUILD)
@@ -4192,10 +4277,10 @@ render_block_handler_struct render_sprite_block_handlers[] =
 
 
 void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
- s32 width, s32 height, u32 flags, u32 color)
+ s32 *width, s32 *height, u32 flags, u32 color)
 {
-  s32 x_right = x + width - 1;
-  s32 y_bottom = y + height - 1;
+  s32 x_right = x + *width - 1;
+  s32 y_bottom = y + *height - 1;
 
 #ifdef PROFILE
   sprites++;
@@ -4204,6 +4289,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
   if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right,
    y_bottom) == 0)
   {
+    *width = *height = 0;
     return;
   }
 
@@ -4212,7 +4298,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
     u32 clip = psx_gpu->viewport_start_x - x;
     x += clip;
     u += clip;
-    width -= clip;
+    *width -= clip;
   }
 
   if(y < psx_gpu->viewport_start_y)
@@ -4220,21 +4306,24 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
     s32 clip = psx_gpu->viewport_start_y - y;
     y += clip;
     v += clip;
-    height -= clip;
+    *height -= clip;
   }
 
   if(x_right > psx_gpu->viewport_end_x)
-    width -= x_right - psx_gpu->viewport_end_x;
+    *width -= x_right - psx_gpu->viewport_end_x;
 
   if(y_bottom > psx_gpu->viewport_end_y)
-    height -= y_bottom - psx_gpu->viewport_end_y;
+    *height -= y_bottom - psx_gpu->viewport_end_y;
 
-  if((width <= 0) || (height <= 0))
+  if((*width <= 0) || (*height <= 0))
+  {
+    *width = *height = 0;
     return;
+  }
 
 #ifdef PROFILE
-  span_pixels += width * height;
-  spans += height;
+  span_pixels += *width * *height;
+  spans += *height;
 #endif
 
   u32 render_state = flags &
@@ -4271,7 +4360,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
   psx_gpu->render_block_handler = render_block_handler;
 
   ((setup_sprite_function_type *)render_block_handler->setup_blocks)
-   (psx_gpu, x, y, u, v, width, height, color);
+   (psx_gpu, x, y, u, v, *width, *height, color);
 }
 
 #define draw_pixel_line_mask_evaluate_yes()                                    \
@@ -4582,6 +4671,7 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,
   if(vertex_a->x >= vertex_b->x)
   {
     vertex_swap(vertex_a, vertex_b);
+    (void)triangle_winding;
   }
 
   x_a = vertex_a->x;
@@ -4803,8 +4893,7 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
   u32 r = color & 0xFF;
   u32 g = (color >> 8) & 0xFF;
   u32 b = (color >> 16) & 0xFF;
-  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
-   psx_gpu->mask_msb;
+  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
   u32 color_32bpp = color_16bpp | (color_16bpp << 16);
 
   u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
@@ -4856,8 +4945,7 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
   u32 r = color & 0xFF;
   u32 g = (color >> 8) & 0xFF;
   u32 b = (color >> 16) & 0xFF;
-  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
-   psx_gpu->mask_msb;
+  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
   u32 color_32bpp = color_16bpp | (color_16bpp << 16);
 
   u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
@@ -4888,6 +4976,7 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
   }
 }
 
+#ifndef PCSX
 void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
  u32 width, u32 height, u32 pitch)
 {
@@ -4919,7 +5008,7 @@ void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y,
   render_block_copy(psx_gpu, psx_gpu->vram_ptr + source_x + (source_y * 1024),
    dest_x, dest_y, width, height, 1024);
 }
-
+#endif
 
 void initialize_reciprocal_table(void)
 {
@@ -4944,7 +5033,7 @@ void initialize_reciprocal_table(void)
 
 
 #define dither_table_row(a, b, c, d)                                           \
- ((a & 0xFF) | ((b & 0xFF) << 8) | ((c & 0xFF) << 16) | ((d & 0xFF) << 24))    \
+ ((a & 0xFF) | ((b & 0xFF) << 8) | ((c & 0xFF) << 16) | ((u32)(d & 0xFF) << 24)) \
 
 void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
 {
@@ -4975,6 +5064,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
   psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;
   psx_gpu->clut_ptr = psx_gpu->vram_ptr;
 
+  psx_gpu->viewport_start_x = psx_gpu->viewport_start_y = 0;
+  psx_gpu->viewport_end_x = psx_gpu->viewport_end_y = 0;
   psx_gpu->mask_msb = 0;
 
   psx_gpu->texture_window_x = 0;
@@ -5004,10 +5095,16 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
   psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1);
   psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0);
   psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2);
+  psx_gpu->allow_dithering = 1;
 
   psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN;
 
   psx_gpu->saved_hres = 256;
+  psx_gpu->hacks_active = 0;
+
+  // check some offsets, asm relies on these
+  psx_gpu->reserved_a[(offsetof(psx_gpu_struct, test_mask) == 0) - 1] = 0;
+  psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0;
 }
 
 u64 get_us(void)