gpu_neon: rework buffering to reduce flickering
authornotaz <notasas@gmail.com>
Mon, 23 Oct 2023 18:00:35 +0000 (21:00 +0300)
committernotaz <notasas@gmail.com>
Mon, 23 Oct 2023 19:27:39 +0000 (22:27 +0300)
... maybe

notaz/pcsx_rearmed#324

16 files changed:
frontend/plat_sdl.c
frontend/plugin_lib.c
frontend/plugin_lib.h
plugins/dfxvideo/gpulib_if.c
plugins/gpu-gles/gpulib_if.c
plugins/gpu_neon/psx_gpu/psx_gpu.c
plugins/gpu_neon/psx_gpu/psx_gpu.h
plugins/gpu_neon/psx_gpu/psx_gpu_4x.c
plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
plugins/gpu_neon/psx_gpu_if.c
plugins/gpu_unai/gpulib_if.cpp
plugins/gpulib/gpu.c
plugins/gpulib/gpu.h
plugins/gpulib/vout_pl.c

index 5f29b90..c557025 100644 (file)
@@ -328,6 +328,13 @@ void plat_video_menu_end(void)
 
 void plat_video_menu_leave(void)
 {
+  void *fb = NULL;
+  if (plat_sdl_overlay != NULL || plat_sdl_gl_active)
+    fb = shadow_fb;
+  else if (plat_sdl_screen)
+    fb = plat_sdl_screen->pixels;
+  if (fb)
+    memset(fb, 0, g_menuscreen_w * g_menuscreen_h * 2);
   in_menu = 0;
 }
 
index 2339028..50aba22 100644 (file)
@@ -134,7 +134,7 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h)
 
        static const unsigned short colors[2] = { 0x1fe3, 0x0700 };
        unsigned short *dest = (unsigned short *)pl_vout_buf +
-               vout_w * (vout_h - HUD_HEIGHT) + vout_w / 2 - 192/2;
+               pl_vout_w * (vout_h - HUD_HEIGHT) + pl_vout_w / 2 - 192/2;
        unsigned short *d, p;
        int c, x, y;
 
@@ -149,7 +149,7 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h)
                     (fmod_chans & (1<<c)) ? 0xf000 :
                     (noise_chans & (1<<c)) ? 0x001f :
                     colors[c & 1];
-               for (y = 0; y < 8; y++, d += vout_w)
+               for (y = 0; y < 8; y++, d += pl_vout_w)
                        for (x = 0; x < 8; x++)
                                d[x] = p;
        }
@@ -302,10 +302,16 @@ static void pl_vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp)
        menu_notify_mode_change(pl_vout_w, pl_vout_h, pl_vout_bpp);
 }
 
+static int flip_clear_counter;
+
+void pl_force_clear(void)
+{
+       flip_clear_counter = 2;
+}
+
 static void pl_vout_flip(const void *vram, int stride, int bgr24,
        int x, int y, int w, int h, int dims_changed)
 {
-       static int clear_counter;
        unsigned char *dest = pl_vout_buf;
        const unsigned short *src = vram;
        int dstride = pl_vout_w, h1 = h;
@@ -332,15 +338,15 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24,
        doffs = xoffs + y * dstride;
 
        if (dims_changed)
-               clear_counter = 2;
+               flip_clear_counter = 2;
 
-       if (clear_counter > 0) {
+       if (flip_clear_counter > 0) {
                if (pl_plat_clear)
                        pl_plat_clear();
                else
                        memset(pl_vout_buf, 0,
                                dstride * h_full * pl_vout_bpp / 8);
-               clear_counter--;
+               flip_clear_counter--;
        }
 
        if (pl_plat_blit)
index 6b2d718..b5d3114 100644 (file)
@@ -45,6 +45,7 @@ void  pl_start_watchdog(void);
 void *pl_prepare_screenshot(int *w, int *h, int *bpp);
 void  pl_init(void);
 void  pl_switch_dispmode(void);
+void  pl_force_clear(void);
 
 void  pl_timing_prepare(int is_pal);
 void  pl_frame_limit(void);
index ac86f37..29cf13f 100644 (file)
@@ -315,7 +315,7 @@ void renderer_notify_res_change(void)
 {
 }
 
-void renderer_notify_scanout_x_change(int x, int w)
+void renderer_notify_scanout_change(int x, int y)
 {
 }
 
index bde209e..3c76182 100644 (file)
@@ -514,7 +514,7 @@ void renderer_notify_res_change(void)
 {
 }
 
-void renderer_notify_scanout_x_change(int x, int w)
+void renderer_notify_scanout_change(int x, int y)
 {
 }
 
index ea3641f..62080f3 100644 (file)
@@ -15,6 +15,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <stddef.h>
 #include <string.h>
 #include <assert.h>
 
@@ -23,6 +24,7 @@
 #include "vector_ops.h"
 #endif
 #include "psx_gpu_simd.h"
+#include "psx_gpu_offsets.h"
 
 #if 0
 void dump_r_d(const char *name, void *dump);
@@ -5012,6 +5014,9 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
   psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN;
 
   psx_gpu->saved_hres = 256;
+
+  // check some offset
+  psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0;
 }
 
 u64 get_us(void)
index 957b434..bac2099 100644 (file)
@@ -141,8 +141,6 @@ typedef struct
   u32 triangle_color;
   u32 dither_table[4];
 
-  u32 uvrgb_phase;
-
   struct render_block_handler_struct *render_block_handler;
   void *texture_page_ptr;
   void *texture_page_base;
@@ -150,6 +148,8 @@ typedef struct
   u16 *vram_ptr;
   u16 *vram_out_ptr;
 
+  u32 uvrgb_phase;
+
   u16 render_state_base;
   u16 render_state;
 
@@ -194,15 +194,15 @@ typedef struct
   s16 saved_viewport_start_y;
   s16 saved_viewport_end_x;
   s16 saved_viewport_end_y;
-  u8  enhancement_buf_by_x16[64];    // 0-3 specifying which buf
-  u16 enhancement_buf_start[4];      // x pos where buf[n] begins
-
-  u16 enhancement_scanout_x[4];
-  u16 enhancement_scanout_select;
+  struct psx_gpu_scanout {
+    u16 x, y, w, h;
+  } enhancement_scanouts[4];         // 0-3 specifying which buf to use
+  u16 enhancement_scanout_eselect;   // eviction selector
+  u16 enhancement_current_buf;
 
   // Align up to 64 byte boundary to keep the upcoming buffers cache line
   // aligned, also make reachable with single immediate addition
-  u8 reserved_a[142];
+  u8 reserved_a[188 + 9*4 - 9*sizeof(void *)];
 
   // 8KB
   block_struct blocks[MAX_BLOCKS_PER_ROW];
index bd6c7a1..7b3ee85 100644 (file)
@@ -1,11 +1,3 @@
-#define select_enhancement_buf_index(psx_gpu, x) \\r
-  ((psx_gpu)->enhancement_buf_by_x16[(u32)(x) / \\r
-    (1024u / sizeof((psx_gpu)->enhancement_buf_by_x16))])\r
-\r
-#define select_enhancement_buf_ptr(psx_gpu, x) \\r
-  ((psx_gpu)->enhancement_buf_ptr + \\r
-    (select_enhancement_buf_index(psx_gpu, x) << 20))\r
-\r
 #if !defined(NEON_BUILD) || defined(SIMD_BUILD)\r
 \r
 #ifndef zip_4x32b\r
@@ -325,12 +317,12 @@ render_block_handler_struct render_sprite_block_handlers_4x[] =
   render_sprite_blocks_switch_block_4x()\r
 };\r
 \r
-\r
 void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,\r
  s32 width, s32 height, u32 flags, u32 color)\r
 {\r
   s32 x_right = x + width - 1;\r
   s32 y_bottom = y + height - 1;\r
+  s16 end_x;\r
 \r
 #ifdef PROFILE\r
   sprites++;\r
@@ -352,8 +344,12 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
     height -= clip;\r
   }\r
 \r
-  if(x_right > psx_gpu->viewport_end_x)\r
-    width -= x_right - psx_gpu->viewport_end_x;\r
+  end_x = psx_gpu->viewport_end_x;\r
+  if (end_x - psx_gpu->viewport_start_x + 1 > 512)\r
+    end_x = psx_gpu->viewport_start_x + 511;\r
+\r
+  if(x_right > end_x)\r
+    width -= x_right - end_x;\r
 \r
   if(y_bottom > psx_gpu->viewport_end_y)\r
     height -= y_bottom - psx_gpu->viewport_end_y;\r
@@ -361,7 +357,9 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
   if((width <= 0) || (height <= 0))\r
     return;\r
 \r
-  psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x);\r
+  if (!psx_gpu->enhancement_current_buf_ptr)\r
+    return;\r
+  psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr;\r
 \r
   x *= 2;\r
   y *= 2;\r
index 161384e..2f8a646 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef __P_PSX_GPU_OFFSETS_H__
 #define __P_PSX_GPU_OFFSETS_H__
 
-#define psx_gpu_test_mask_offset                          0x0
 #define psx_gpu_uvrg_offset                               0x10
 #define psx_gpu_uvrg_dx_offset                            0x20
 #define psx_gpu_uvrg_dy_offset                            0x30
 #define psx_gpu_b_offset                                  0x90
 #define psx_gpu_b_dy_offset                               0x94
 #define psx_gpu_triangle_area_offset                      0x98
-#define psx_gpu_texture_window_settings_offset            0x9c
 #define psx_gpu_current_texture_mask_offset               0xa0
-#define psx_gpu_viewport_mask_offset                      0xa4
 #define psx_gpu_dirty_textures_4bpp_mask_offset           0xa8
 #define psx_gpu_dirty_textures_8bpp_mask_offset           0xac
 #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 0xb0
 #define psx_gpu_triangle_color_offset                     0xb4
 #define psx_gpu_dither_table_offset                       0xb8
-#define psx_gpu_uvrgb_phase_offset                        0xc8
-#define psx_gpu_render_block_handler_offset               0xcc
-#define psx_gpu_texture_page_ptr_offset                   0xd0
-#define psx_gpu_texture_page_base_offset                  0xd4
-#define psx_gpu_clut_ptr_offset                           0xd8
-#define psx_gpu_vram_ptr_offset                           0xdc
-#define psx_gpu_vram_out_ptr_offset                       0xe0
-#define psx_gpu_render_state_base_offset                  0xe4
-#define psx_gpu_render_state_offset                       0xe6
+#define psx_gpu_texture_page_ptr_offset                   0xcc
+#define psx_gpu_texture_page_base_offset                  0xd0
+#define psx_gpu_clut_ptr_offset                           0xd4
+#define psx_gpu_vram_ptr_offset                           0xd8
+#define psx_gpu_vram_out_ptr_offset                       0xdc
+#define psx_gpu_uvrgb_phase_offset                        0xe0
 #define psx_gpu_num_spans_offset                          0xe8
 #define psx_gpu_num_blocks_offset                         0xea
 #define psx_gpu_viewport_start_x_offset                   0xec
 #define psx_gpu_viewport_end_y_offset                     0xf2
 #define psx_gpu_mask_msb_offset                           0xf4
 #define psx_gpu_triangle_winding_offset                   0xf6
-#define psx_gpu_display_area_draw_enable_offset           0xf7
 #define psx_gpu_current_texture_page_offset               0xf8
-#define psx_gpu_last_8bpp_texture_page_offset             0xf9
 #define psx_gpu_texture_mask_width_offset                 0xfa
 #define psx_gpu_texture_mask_height_offset                0xfb
-#define psx_gpu_texture_window_x_offset                   0xfc
-#define psx_gpu_texture_window_y_offset                   0xfd
-#define psx_gpu_primitive_type_offset                     0xfe
-#define psx_gpu_render_mode_offset                        0xff
-#define psx_gpu_offset_x_offset                           0x100
-#define psx_gpu_offset_y_offset                           0x102
-#define psx_gpu_clut_settings_offset                      0x104
-#define psx_gpu_texture_settings_offset                   0x106
 #define psx_gpu_reciprocal_table_ptr_offset               0x108
 #define psx_gpu_blocks_offset                             0x200
 #define psx_gpu_span_uvrg_offset_offset                   0x2200
 #define psx_gpu_span_edge_data_offset                     0x4200
 #define psx_gpu_span_b_offset_offset                      0x5200
-#define psx_gpu_texture_4bpp_cache_offset                 0x5a00
-#define psx_gpu_texture_8bpp_even_cache_offset            0x205a00
-#define psx_gpu_texture_8bpp_odd_cache_offset             0x305a00
 
 #endif /* __P_PSX_GPU_OFFSETS_H__ */
index b1de121..9b37848 100644 (file)
@@ -4,7 +4,7 @@
 #include "common.h"
 
 #define WRITE_OFFSET(f, member) \
-       fprintf(f, "#define %-50s0x%x\n", \
+       fprintf(f, "#define %-50s0x%zx\n", \
                "psx_gpu_" #member "_offset", \
                offsetof(psx_gpu_struct, member));
 
@@ -22,8 +22,10 @@ int main()
                perror("fopen");
                return 1;
        }
+       fputs("#ifndef __P_PSX_GPU_OFFSETS_H__\n", f);
+       fputs("#define __P_PSX_GPU_OFFSETS_H__\n\n", f);
 
-       WRITE_OFFSET(f, test_mask);
+       //WRITE_OFFSET(f, test_mask);
        WRITE_OFFSET(f, uvrg);
        WRITE_OFFSET(f, uvrg_dx);
        WRITE_OFFSET(f, uvrg_dy);
@@ -35,23 +37,23 @@ int main()
        WRITE_OFFSET(f, b);
        WRITE_OFFSET(f, b_dy);
        WRITE_OFFSET(f, triangle_area);
-       WRITE_OFFSET(f, texture_window_settings);
+       //WRITE_OFFSET(f, texture_window_settings);
        WRITE_OFFSET(f, current_texture_mask);
-       WRITE_OFFSET(f, viewport_mask);
+       //WRITE_OFFSET(f, viewport_mask);
        WRITE_OFFSET(f, dirty_textures_4bpp_mask);
        WRITE_OFFSET(f, dirty_textures_8bpp_mask);
        WRITE_OFFSET(f, dirty_textures_8bpp_alternate_mask);
        WRITE_OFFSET(f, triangle_color);
        WRITE_OFFSET(f, dither_table);
-       WRITE_OFFSET(f, uvrgb_phase);
-       WRITE_OFFSET(f, render_block_handler);
+       //WRITE_OFFSET(f, render_block_handler);
        WRITE_OFFSET(f, texture_page_ptr);
        WRITE_OFFSET(f, texture_page_base);
        WRITE_OFFSET(f, clut_ptr);
        WRITE_OFFSET(f, vram_ptr);
        WRITE_OFFSET(f, vram_out_ptr);
-       WRITE_OFFSET(f, render_state_base);
-       WRITE_OFFSET(f, render_state);
+       WRITE_OFFSET(f, uvrgb_phase);
+       //WRITE_OFFSET(f, render_state_base);
+       //WRITE_OFFSET(f, render_state);
        WRITE_OFFSET(f, num_spans);
        WRITE_OFFSET(f, num_blocks);
        WRITE_OFFSET(f, viewport_start_x);
@@ -60,27 +62,29 @@ int main()
        WRITE_OFFSET(f, viewport_end_y);
        WRITE_OFFSET(f, mask_msb);
        WRITE_OFFSET(f, triangle_winding);
-       WRITE_OFFSET(f, display_area_draw_enable);
+       //WRITE_OFFSET(f, display_area_draw_enable);
        WRITE_OFFSET(f, current_texture_page);
-       WRITE_OFFSET(f, last_8bpp_texture_page);
+       //WRITE_OFFSET(f, last_8bpp_texture_page);
        WRITE_OFFSET(f, texture_mask_width);
        WRITE_OFFSET(f, texture_mask_height);
-       WRITE_OFFSET(f, texture_window_x);
-       WRITE_OFFSET(f, texture_window_y);
-       WRITE_OFFSET(f, primitive_type);
-       WRITE_OFFSET(f, render_mode);
-       WRITE_OFFSET(f, offset_x);
-       WRITE_OFFSET(f, offset_y);
-       WRITE_OFFSET(f, clut_settings);
-       WRITE_OFFSET(f, texture_settings);
+       //WRITE_OFFSET(f, texture_window_x);
+       //WRITE_OFFSET(f, texture_window_y);
+       //WRITE_OFFSET(f, primitive_type);
+       //WRITE_OFFSET(f, render_mode);
+       //WRITE_OFFSET(f, offset_x);
+       //WRITE_OFFSET(f, offset_y);
+       //WRITE_OFFSET(f, clut_settings);
+       //WRITE_OFFSET(f, texture_settings);
        WRITE_OFFSET(f, reciprocal_table_ptr);
        WRITE_OFFSET(f, blocks);
        WRITE_OFFSET(f, span_uvrg_offset);
        WRITE_OFFSET(f, span_edge_data);
        WRITE_OFFSET(f, span_b_offset);
-       WRITE_OFFSET(f, texture_4bpp_cache);
-       WRITE_OFFSET(f, texture_8bpp_even_cache);
-       WRITE_OFFSET(f, texture_8bpp_odd_cache);
+       //WRITE_OFFSET(f, texture_4bpp_cache);
+       //WRITE_OFFSET(f, texture_8bpp_even_cache);
+       //WRITE_OFFSET(f, texture_8bpp_odd_cache);
+
+       fputs("\n#endif /* __P_PSX_GPU_OFFSETS_H__ */\n", f);
        fclose(f);
 
        return 0;
index 5badf6b..de227d5 100644 (file)
@@ -788,78 +788,111 @@ breakloop:
 
 #ifdef PCSX
 
-#define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16))
-
-static int is_new_scanout(psx_gpu_struct *psx_gpu, int x)
+// this thing has become such a PITA, should just handle the 2048 width really
+static void update_enhancement_buf_scanouts(psx_gpu_struct *psx_gpu,
+    int x, int y, int w, int h)
 {
-  int i, scanout_x;
-  for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanout_x); i++)
-  {
-    scanout_x = psx_gpu->enhancement_scanout_x[i];
-    if (x <= scanout_x && scanout_x < x + ENH_BUF_TABLE_STEP)
-    {
-      if (x != scanout_x)
-        log_anomaly("unaligned scanout x: %d,%d\n", scanout_x, x);
-      return 1;
-    }
+  int max_bufs = ARRAY_SIZE(psx_gpu->enhancement_scanouts);
+  struct psx_gpu_scanout *s;
+  int i, sel, right, bottom;
+  u32 tol_x = 48, tol_y = 16;
+  u32 intersection;
+
+  //w = (w + 15) & ~15;
+  psx_gpu->saved_hres = w;
+  assert(!(max_bufs & (max_bufs - 1)));
+  for (i = 0; i < max_bufs; i++) {
+    s = &psx_gpu->enhancement_scanouts[i];
+    if (s->x == x && s->y == y && w - s->w <= tol_x && h - s->h <= tol_y)
+      return;
   }
-  return 0;
-}
-
-static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu)
-{
-  u32 b, x;
 
-  b = 0;
-  psx_gpu->enhancement_buf_by_x16[0] = b;
-  psx_gpu->enhancement_buf_start[0] = 0;
-  for (x = 1; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++)
-  {
-    if (b < 3 && is_new_scanout(psx_gpu, x * ENH_BUF_TABLE_STEP)) {
-      b++;
-      psx_gpu->enhancement_buf_start[b] = x * ENH_BUF_TABLE_STEP;
+  // evict any scanout that intersects
+  right = x + w;
+  bottom = y + h;
+  for (i = 0, sel = -1; i < max_bufs; i++) {
+    s = &psx_gpu->enhancement_scanouts[i];
+    if (s->x >= right) continue;
+    if (s->x + s->w <= x) continue;
+    if (s->y >= bottom) continue;
+    if (s->y + s->h <= y) continue;
+    // ... but allow upto 16 pixels intersection that some games do
+    if ((intersection = s->x + s->w - x) - 1u <= tol_x) {
+      s->w -= intersection;
+      continue;
     }
-
-    psx_gpu->enhancement_buf_by_x16[x] = b;
+    if ((intersection = s->y + s->h - y) - 1u <= tol_y) {
+      s->h -= intersection;
+      continue;
+    }
+    //printf("%4d%4d%4dx%d evicted\n", s->x, s->y, s->w, s->h);
+    s->w = 0;
+    sel = i;
+    break;
   }
-#if 0
-  printf("buf_by_x16:\n");
-  for (b = 0; b < 3; b++) {
-    int first = -1, count = 0;
-    for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) {
-      if (psx_gpu->enhancement_buf_by_x16[x] == b) {
-        if (first < 0) first = x;
-        count++;
+  if (sel >= 0) {
+    // 2nd intersection check
+    for (i = 0; i < max_bufs; i++) {
+      s = &psx_gpu->enhancement_scanouts[i];
+      if (!s->w)
+        continue;
+      if ((intersection = right - s->x) - 1u <= tol_x) {
+        w -= intersection;
+        break;
+      }
+      if ((intersection = bottom - s->y) - 1u <= tol_y) {
+        h -= intersection;
+        break;
       }
-    }
-    if (count) {
-      assert(first * ENH_BUF_TABLE_STEP == psx_gpu->enhancement_buf_start[b]);
-      printf("%d: %3zd-%zd\n", b, first * ENH_BUF_TABLE_STEP,
-          (first + count) * ENH_BUF_TABLE_STEP);
     }
   }
+  else
+    sel = psx_gpu->enhancement_scanout_eselect++;
+  psx_gpu->enhancement_scanout_eselect &= max_bufs - 1;
+  s = &psx_gpu->enhancement_scanouts[sel];
+  s->x = x;
+  s->y = y;
+  s->w = w;
+  s->h = h;
+
+  sync_enhancement_buffers(x, y, w, h);
+#if 0
+  printf("scanouts:\n");
+  for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanouts); i++) {
+    s = &psx_gpu->enhancement_scanouts[i];
+    if (s->w)
+      printf("%4d%4d%4dx%d\n", s->x, s->y, s->w, s->h);
+  }
 #endif
 }
 
-static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu,
- u32 x0, u32 len)
+static int select_enhancement_buf_index(psx_gpu_struct *psx_gpu, s32 x, s32 y)
 {
-#if 0
-  u32 x, b;
+  int i;
+  for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanouts); i++) {
+    const struct psx_gpu_scanout *s = &psx_gpu->enhancement_scanouts[i];
+    if (s->x <= x && x < s->x + s->w &&
+        s->y <= y && y < s->y + s->h)
+      return i;
+  }
+  return -1;
+}
 
-  for (x = x0, b = 0; x >= len; b++)
-    x -= len;
-  if (b > 3)
-    b = 3;
+#define select_enhancement_buf_by_index(psx_gpu_, i_) \
+  ((psx_gpu_)->enhancement_buf_ptr + ((i_) << 20))
 
-  memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP,
-   b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP);
-#endif
+static void *select_enhancement_buf_ptr(psx_gpu_struct *psx_gpu, s32 x, s32 y)
+{
+  int i = select_enhancement_buf_index(psx_gpu, x, y);
+  return i >= 0 ? select_enhancement_buf_by_index(psx_gpu, i) : NULL;
 }
 
-#define select_enhancement_buf(psx_gpu) \
-  psx_gpu->enhancement_current_buf_ptr = \
-    select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x)
+static void select_enhancement_buf(psx_gpu_struct *psx_gpu)
+{
+  s32 x = psx_gpu->saved_viewport_start_x;
+  s32 y = psx_gpu->saved_viewport_start_y;
+  psx_gpu->enhancement_current_buf_ptr = select_enhancement_buf_ptr(psx_gpu, x, y);
+}
 
 #define enhancement_disable() { \
   psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \
@@ -870,13 +903,19 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu,
   psx_gpu->uvrgb_phase = 0x8000; \
 }
 
-#define enhancement_enable() { \
-  psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; \
-  psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \
-  psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \
-  psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; \
-  psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; \
-  psx_gpu->uvrgb_phase = 0x7fff; \
+static int enhancement_enable(psx_gpu_struct *psx_gpu)
+{
+  if (!psx_gpu->enhancement_current_buf_ptr)
+    return 0;
+  psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr;
+  psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2;
+  psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2;
+  psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1;
+  psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1;
+  if (psx_gpu->viewport_end_x - psx_gpu->viewport_start_x + 1 > 1024)
+    psx_gpu->viewport_end_x = psx_gpu->viewport_start_x + 1023;
+  psx_gpu->uvrgb_phase = 0x7fff;
+  return 1;
 }
 
 #define shift_vertices3(v) { \
@@ -971,17 +1010,10 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h)
 
 static int disable_main_render;
 
-static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int x_end)
+// simple check for a case where no clipping is used
+//  - now handled by adjusting the viewport
+static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int y)
 {
-  // reject to avoid oveflowing the 1024 width
-  // (assume some offscreen render-to-texture thing)
-  int fb_index;
-  if (x < 0)
-    return 1;
-  fb_index = select_enhancement_buf_index(psx_gpu, x);
-  if (x >= psx_gpu->enhancement_buf_start[fb_index] + 512)
-    return 0;
-
   return 1;
 }
 
@@ -1067,7 +1099,9 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu,
   if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x))
     return;
 
-  enhancement_enable();
+  if (!enhancement_enable(psx_gpu))
+    return;
+
   shift_vertices3(vertex_ptrs);
   shift_triangle_area();
   render_triangle_p(psx_gpu, vertex_ptrs, current_command);
@@ -1198,7 +1232,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
         u32 width = list_s16[4] & 0x3FF;
         u32 height = list_s16[5] & 0x1FF;
         u32 color = list[0] & 0xFFFFFF;
-        u32 i1, i2;
+        s32 i1, i2;
 
         x &= ~0xF;
         width = ((width + 0xF) & ~0xF);
@@ -1207,16 +1241,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
 
         do_fill(psx_gpu, x, y, width, height, color);
 
-        i1 = select_enhancement_buf_index(psx_gpu, x);
-        i2 = select_enhancement_buf_index(psx_gpu, x + width - 1);
-        if (i1 != i2) {
+        i1 = select_enhancement_buf_index(psx_gpu, x, y);
+        i2 = select_enhancement_buf_index(psx_gpu, x + width - 1, y + height - 1);
+        if (i1 < 0 || i1 != i2) {
           sync_enhancement_buffers(x, y, width, height);
           break;
         }
-        if (x >= psx_gpu->enhancement_buf_start[i1] + psx_gpu->saved_hres)
-          break;
 
-        psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x);
+        psx_gpu->vram_out_ptr = select_enhancement_buf_by_index(psx_gpu, i1);
         x *= 2;
         y *= 2;
         width *= 2;
@@ -1346,8 +1378,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
         vertexes[1].y = list_s16[5] + psx_gpu->offset_y;
 
         render_line(psx_gpu, vertexes, current_command, list[0], 0);
-        enhancement_enable();
-        render_line(psx_gpu, vertexes, current_command, list[0], 1);
+        if (enhancement_enable(psx_gpu))
+          render_line(psx_gpu, vertexes, current_command, list[0], 1);
         break;
       }
   
@@ -1370,8 +1402,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
 
           enhancement_disable();
           render_line(psx_gpu, vertexes, current_command, list[0], 0);
-          enhancement_enable();
-          render_line(psx_gpu, vertexes, current_command, list[0], 1);
+          if (enhancement_enable(psx_gpu))
+            render_line(psx_gpu, vertexes, current_command, list[0], 1);
 
           list_position++;
           num_vertexes++;
@@ -1406,8 +1438,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
         vertexes[1].y = list_s16[7] + psx_gpu->offset_y;
 
         render_line(psx_gpu, vertexes, current_command, 0, 0);
-        enhancement_enable();
-        render_line(psx_gpu, vertexes, current_command, 0, 1);
+        if (enhancement_enable(psx_gpu))
+          render_line(psx_gpu, vertexes, current_command, 0, 1);
         break;
       }
  
@@ -1439,8 +1471,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
 
           enhancement_disable();
           render_line(psx_gpu, vertexes, current_command, 0, 0);
-          enhancement_enable();
-          render_line(psx_gpu, vertexes, current_command, 0, 1);
+          if (enhancement_enable(psx_gpu))
+            render_line(psx_gpu, vertexes, current_command, 0, 1);
 
           list_position += 2;
           num_vertexes++;
@@ -1632,8 +1664,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
       {
         s16 viewport_start_x = list[0] & 0x3FF;
         s16 viewport_start_y = (list[0] >> 10) & 0x1FF;
-        u32 w;
-        s32 d;
 
         if(viewport_start_x == psx_gpu->viewport_start_x &&
          viewport_start_y == psx_gpu->viewport_start_y)
@@ -1645,13 +1675,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
         psx_gpu->saved_viewport_start_x = viewport_start_x;
         psx_gpu->saved_viewport_start_y = viewport_start_y;
 
-        w = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1;
-        d = psx_gpu->saved_hres - w;
-        if(-16 <= d && d <= 16)
-        {
-          update_enhancement_buf_table_from_x(psx_gpu,
-           viewport_start_x, w);
-        }
         select_enhancement_buf(psx_gpu);
 
 #ifdef TEXTURE_CACHE_4BPP
@@ -1668,8 +1691,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
       {
         s16 viewport_end_x = list[0] & 0x3FF;
         s16 viewport_end_y = (list[0] >> 10) & 0x1FF;
-        u32 w;
-        s32 d;
 
         if(viewport_end_x == psx_gpu->viewport_end_x &&
          viewport_end_y == psx_gpu->viewport_end_y)
@@ -1682,13 +1703,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
         psx_gpu->saved_viewport_end_x = viewport_end_x;
         psx_gpu->saved_viewport_end_y = viewport_end_y;
 
-        w = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1;
-        d = psx_gpu->saved_hres - w;
-        if(-16 <= d && d <= 16)
-        {
-          update_enhancement_buf_table_from_x(psx_gpu,
-           psx_gpu->viewport_start_x, w);
-        }
         select_enhancement_buf(psx_gpu);
 
 #ifdef TEXTURE_CACHE_4BPP
index a4b18ab..a1fbb91 100644 (file)
 #include <assert.h>
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
 
 extern const unsigned char cmd_lengths[256];
 #define command_lengths cmd_lengths
@@ -60,7 +66,9 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd)
 static void *get_enhancement_bufer(int *x, int *y, int *w, int *h,
  int *vram_h)
 {
-  uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x);
+  uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x, *y);
+  if (ret == NULL)
+    return NULL;
 
   *x *= 2;
   *y *= 2;
@@ -114,44 +122,28 @@ void renderer_finish(void)
 static __attribute__((noinline)) void
 sync_enhancement_buffers(int x, int y, int w, int h)
 {
-  const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16);
-  int hres = egpu.saved_hres;
-  int x_buf, w1, s, fb_index;
-  u16 *src, *dst;
-
-  if (egpu.enhancement_buf_ptr == NULL)
-    return;
-
-  w += x & (step_x - 1);
-  x &= ~(step_x - 1);
-  w = (w + step_x - 1) & ~(step_x - 1);
-  if (y + h > 512)
-    h = 512 - y;
-
-  // find x_buf which is an offset into this enhancement_buf
-  fb_index = egpu.enhancement_buf_by_x16[x / step_x];
-  x_buf = x - egpu.enhancement_buf_start[fb_index];
-
-  while (w > 0) {
-    fb_index = egpu.enhancement_buf_by_x16[x / step_x];
-    for (w1 = 0; w > 0 && x_buf < hres; x_buf += step_x, w1++, w -= step_x)
-      if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1])
-        break;
-    // skip further unneeded data, if any
-    for (s = 0; w > 0; s++, w -= step_x)
-      if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1 + s])
-        break;
-
-    if (w1 > 0) {
-      src = gpu.vram + y * 1024 + x;
-      dst = select_enhancement_buf_ptr(&egpu, x);
-      dst += (y * 1024 + x) * 2;
-      scale2x_tiles8(dst, src, w1 * step_x / 8, h);
-    }
-
-    x += (w1 + s) * step_x;
-    x &= 0x3ff;
-    x_buf = 0;
+  int i, right = x + w, bottom = y + h;
+  const u16 *src = gpu.vram;
+  // use these because the scanout struct may hold reduced w, h
+  // due to intersection stuff, see the update_enhancement_buf_scanouts() mess
+  int s_w = max(gpu.screen.hres, gpu.screen.w);
+  int s_h = gpu.screen.vres;
+  s_w = min(s_w, 512);
+  for (i = 0; i < ARRAY_SIZE(egpu.enhancement_scanouts); i++) {
+    const struct psx_gpu_scanout *s = &egpu.enhancement_scanouts[i];
+    u16 *dst = select_enhancement_buf_by_index(&egpu, i);
+    int x1, x2, y1, y2;
+    if (s->w == 0) continue;
+    if (s->x >= right) continue;
+    if (s->x + s_w <= x) continue;
+    if (s->y >= bottom) continue;
+    if (s->y + s_h <= y) continue;
+    x1 = max(x, s->x);
+    x2 = min(right, s->x + s_w);
+    y1 = max(y, s->y);
+    y2 = min(bottom, s->y + s_h);
+    scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2,
+        src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1);
   }
 }
 
@@ -166,8 +158,8 @@ void renderer_update_caches(int x, int y, int w, int h, int state_changed)
 
   if (gpu.state.enhancement_active) {
     if (state_changed) {
-      egpu.saved_hres = 0;
-      renderer_notify_res_change();
+      memset(egpu.enhancement_scanouts, 0, sizeof(egpu.enhancement_scanouts));
+      egpu.enhancement_scanout_eselect = 0;
       return;
     }
     sync_enhancement_buffers(x, y, w, h);
@@ -190,45 +182,15 @@ void renderer_set_interlace(int enable, int is_odd)
 
 void renderer_notify_res_change(void)
 {
-  renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
+  renderer_notify_scanout_change(gpu.screen.src_x, gpu.screen.src_y);
 }
 
-void renderer_notify_scanout_x_change(int x, int w)
+void renderer_notify_scanout_change(int x, int y)
 {
-  int hres = (w + 15) & ~15;
-  int max_bufs = ARRAY_SIZE(egpu.enhancement_scanout_x);
-  int need_update = 0;
-  int i;
-
-  if (!gpu.state.enhancement_active)
+  if (!gpu.state.enhancement_active || !egpu.enhancement_buf_ptr)
     return;
 
-  assert(!(max_bufs & (max_bufs - 1)));
-  if (egpu.saved_hres != hres) {
-    for (i = 0; i < max_bufs; i++)
-      egpu.enhancement_scanout_x[i] = x;
-    need_update = 1;
-  }
-
-  if (egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] != x)
-  {
-    // maybe triple buffering?
-    for (i = 0; i < max_bufs; i++)
-      if (egpu.enhancement_scanout_x[i] == x)
-        break;
-    if (i == max_bufs)
-      need_update = 1;
-
-    egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] = x;
-  }
-  egpu.enhancement_scanout_select++;
-  egpu.enhancement_scanout_select &= max_bufs - 1;
-  if (need_update)
-  {
-    egpu.saved_hres = hres;
-    update_enhancement_buf_table_from_hres(&egpu);
-    sync_enhancement_buffers(0, 0, 1024, 512);
-  }
+  update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, gpu.screen.vres);
 }
 
 #include "../../frontend/plugin_lib.h"
index 140193e..2079431 100644 (file)
@@ -317,7 +317,7 @@ void renderer_notify_res_change(void)
   */
 }
 
-void renderer_notify_scanout_x_change(int x, int w)
+void renderer_notify_scanout_change(int x, int y)
 {
 }
 
index 6814819..dfd4366 100644 (file)
@@ -299,6 +299,7 @@ long GPUshutdown(void)
 void GPUwriteStatus(uint32_t data)
 {
   uint32_t cmd = data >> 24;
+  int src_x, src_y;
 
   if (cmd < ARRAY_SIZE(gpu.regs)) {
     if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data)
@@ -328,14 +329,17 @@ void GPUwriteStatus(uint32_t data)
       gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
       break;
     case 0x05:
-      gpu.screen.src_x = data & 0x3ff;
-      gpu.screen.src_y = (data >> 10) & 0x1ff;
-      renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres);
-      if (gpu.frameskip.set) {
-        decide_frameskip_allow(gpu.ex_regs[3]);
-        if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
-          decide_frameskip();
-          gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
+      src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
+      if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) {
+        gpu.screen.src_x = src_x;
+        gpu.screen.src_y = src_y;
+        renderer_notify_scanout_change(src_x, src_y);
+        if (gpu.frameskip.set) {
+          decide_frameskip_allow(gpu.ex_regs[3]);
+          if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
+            decide_frameskip();
+            gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
+          }
         }
       }
       break;
@@ -869,7 +873,7 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
         GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
       }
       renderer_sync_ecmds(gpu.ex_regs);
-      renderer_update_caches(0, 0, 1024, 512, 1);
+      renderer_update_caches(0, 0, 1024, 512, 0);
       break;
   }
 
index 28458cf..82fbe94 100644 (file)
@@ -135,7 +135,7 @@ void renderer_set_config(const struct rearmed_cbs *config);
 void renderer_notify_res_change(void);
 void renderer_notify_update_lace(int updated);
 void renderer_sync(void);
-void renderer_notify_scanout_x_change(int x, int w);
+void renderer_notify_scanout_change(int x, int y);
 
 int  vout_init(void);
 int  vout_finish(void);
index c166b57..30245b8 100644 (file)
@@ -93,6 +93,8 @@ void vout_update(void)
     if (!gpu.state.enhancement_was_active)
       return; // buffer not ready yet
     vram = gpu.get_enhancement_bufer(&src_x, &src_y, &w, &h, &vram_h);
+    if (vram == NULL)
+      return;
     x *= 2; y *= 2;
     src_x2 *= 2;
   }