gpulib: fix missing updates on vram copy

author notaz <notasas@gmail.com>

Sat, 11 Apr 2026 23:38:25 +0000 (02:38 +0300)

committer notaz <notasas@gmail.com>

Sat, 11 Apr 2026 23:51:44 +0000 (02:51 +0300)
author notaz <notasas@gmail.com>
Sat, 11 Apr 2026 23:38:25 +0000 (02:38 +0300)
committer notaz <notasas@gmail.com>
Sat, 11 Apr 2026 23:51:44 +0000 (02:51 +0300)
diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c

index 0e967d6..b347046 100644 (file)
--- a/plugins/gpulib/gpu.c
+++ b/plugins/gpulib/gpu.c
@@ -239,18 +239,29 @@ static noinline void decide_frameskip(struct psx_gpu *gpu, uint32_t flip_delay)
      gpu->frameskip.active = 0;
  }
  
+static int check_screen_intersect(struct psx_gpu *gpu, int x, int y, int w, int h)
+{
+  int32_t screen_r = gpu->screen.src_x + gpu->screen.w;
+  int32_t screen_b = gpu->screen.src_y + gpu->screen.h;
+  int32_t dst_r = x + w, dst_b = y + h;
+  int32_t no_intersect;
+  no_intersect  = screen_r - x - 1;
+  no_intersect |= screen_b - y - 1;
+  no_intersect |= dst_r - gpu->screen.src_x - 1;
+  no_intersect |= dst_b - gpu->screen.src_y - 1;
+  no_intersect >>= 31;
+  return !no_intersect;
+}
+
  static noinline void check_draw_to_display(struct psx_gpu *gpu)
  {
    uint32_t cmd_e3 = gpu->ex_regs[3];
-  uint32_t x1 = cmd_e3 & 0x3ff,    y1 = (cmd_e3 >> 10) & 0x3ff;
-  uint32_t x2 = gpu->screen.src_x, y2 = gpu->screen.src_y;
-  uint32_t w = gpu->screen.w,      h = gpu->screen.h;
-  uint32_t no_intersect =
-    x1 + w <= x2 || x2 + w <= x1 || y1 + h <= y2 || y2 + h <= y1;
-  gpu->state.draw_display_intersect = !no_intersect;
+  uint32_t x1 = cmd_e3 & 0x3ff, y1 = (cmd_e3 >> 10) & 0x3ff;
+  int intersect = check_screen_intersect(gpu, x1, y1, gpu->screen.w, gpu->screen.h);
+  gpu->state.draw_display_intersect = intersect;
    // no frameskip if it decides to draw to display area,
    // but not for interlace since it'll most likely always do that
-  gpu->frameskip.allow = no_intersect || (gpu->status & PSX_GPU_STATUS_INTERLACE);
+  gpu->frameskip.allow = !intersect || (gpu->status & PSX_GPU_STATUS_INTERLACE);
  }
  
  static void flush_cmd_buffer(struct psx_gpu *gpu);
@@ -598,20 +609,12 @@ static void finish_vram_transfer(struct psx_gpu *gpu, int is_read, int is_async)
    if (is_read)
      gpu->status &= ~PSX_GPU_STATUS_IMG;
    else {
-    int32_t screen_r = gpu->screen.src_x + gpu->screen.hres;
-    int32_t screen_b = gpu->screen.src_y + gpu->screen.vres;
-    int32_t dma_r = gpu->dma_start.x + gpu->dma_start.w;
-    int32_t dma_b = gpu->dma_start.y + gpu->dma_start.h;
-    int32_t not_dirty;
-    not_dirty  = screen_r - gpu->dma_start.x - 1;
-    not_dirty |= screen_b - gpu->dma_start.y - 1;
-    not_dirty |= dma_r - gpu->screen.src_x - 1;
-    not_dirty |= dma_b - gpu->screen.src_y - 1;
-    not_dirty >>= 31;
+    int intersect = check_screen_intersect(gpu, gpu->dma_start.x, gpu->dma_start.y,
+                      gpu->dma_start.w, gpu->dma_start.h);
      log_io(gpu, "dma %3d,%3d %dx%d scr %3d,%3d %3dx%3d -> dirty %d\n",
        gpu->dma_start.x, gpu->dma_start.y, gpu->dma_start.w, gpu->dma_start.h,
-      gpu->screen.src_x, gpu->screen.src_y, gpu->screen.hres, gpu->screen.vres, !not_dirty);
-    gpu->state.fb_dirty_display_area |= !not_dirty;
+      gpu->screen.src_x, gpu->screen.src_y, gpu->screen.hres, gpu->screen.vres, intersect);
+    gpu->state.fb_dirty_display_area |= intersect;
      gpu->state.fb_dirty = 1;
      if (!is_async)
        renderer_update_caches(gpu->dma_start.x, gpu->dma_start.y,
@@ -621,8 +624,27 @@ static void finish_vram_transfer(struct psx_gpu *gpu, int is_read, int is_async)
      gpu->gpu_state_change(PGS_VRAM_TRANSFER_END, 0);
  }
  
-int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs,
-      const uint32_t *params, int *cpu_cycles)
+int do_vram_copy_pre(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles)
+{
+  const uint32_t sx =  LE32TOH(params[1]) & 0x3FF;
+  const uint32_t sy = (LE32TOH(params[1]) >> 16) & 0x1FF;
+  const uint32_t dx =  LE32TOH(params[2]) & 0x3FF;
+  const uint32_t dy = (LE32TOH(params[2]) >> 16) & 0x1FF;
+  uint32_t w =  ((LE32TOH(params[3]) - 1) & 0x3FF) + 1;
+  uint32_t h = (((LE32TOH(params[3]) >> 16) - 1) & 0x1FF) + 1;
+  int intersect;
+
+  *cpu_cycles = gput_copy(w, h);
+  if (sx == dx && sy == dy && !(gpu->ex_regs[6] & 0x8000))
+    return 0;
+
+  intersect = check_screen_intersect(gpu, dx, dy, w, h);
+  gpu->state.fb_dirty_display_area |= intersect;
+  gpu->state.fb_dirty = 1;
+  return 1;
+}
+
+int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs, const uint32_t *params)
  {
    const uint32_t sx =  LE32TOH(params[1]) & 0x3FF;
    const uint32_t sy = (LE32TOH(params[1]) >> 16) & 0x1FF;
@@ -634,10 +656,6 @@ int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs,
    uint16_t lbuf[128];
    uint32_t x, y;
  
-  *cpu_cycles += gput_copy(w, h);
-  if (sx == dx && sy == dy && msb == 0)
-    return 4;
-
    renderer_flush_queues();
  
    if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb))
@@ -855,8 +873,8 @@ static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count
          break;
        *cycles_sum += *cycles_last;
        *cycles_last = 0;
-      do_vram_copy(gpu->vram, gpu->ex_regs, data + pos, cycles_last);
-      vram_dirty = 1;
+      if (do_vram_copy_pre(gpu, data + pos, cycles_last))
+        do_vram_copy(gpu->vram, gpu->ex_regs, data + pos);
        pos += 4;
        continue;
      case 0x00:
diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h

index 371706a..6073fbf 100644 (file)
--- a/plugins/gpulib/gpu.h
+++ b/plugins/gpulib/gpu.h
@@ -162,8 +162,8 @@ void vout_set_config(const struct rearmed_cbs *config);
  // helpers
  #define VRAM_MEM_XY(vram_, x, y) &vram_[(y) * 1024 + (x)]
  
-int  do_vram_copy(uint16_t *vram, const uint32_t *ex_regs,
-       const uint32_t *params, int *cpu_cycles);
+int  do_vram_copy_pre(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles);
+int  do_vram_copy(uint16_t *vram, const uint32_t *ex_regs, const uint32_t *params);
  
  int  prim_try_simplify_quad_t (void *simplified, const void *prim);
  int  prim_try_simplify_quad_gt(void *simplified, const void *prim);
diff --git a/plugins/gpulib/gpu_async.c b/plugins/gpulib/gpu_async.c

index 373663d..a67509b 100644 (file)
--- a/plugins/gpulib/gpu_async.c
+++ b/plugins/gpulib/gpu_async.c
@@ -264,7 +264,7 @@ int gpu_async_do_cmd_list(struct psx_gpu *gpu, const uint32_t *list_data, int li
      const uint32_t *list = list_data + pos;
      const int16_t *slist = (void *)list;
      const struct pos_drawarea *darea;
-    int rendered = 1, skip = 0;
+    int rendered = 1, skip = 0, cyc_tmp;
      int num_vertexes, x, y, w, h;
  
      cmd = LE32TOH(list[0]) >> 24;
@@ -352,6 +352,12 @@ int gpu_async_do_cmd_list(struct psx_gpu *gpu, const uint32_t *list_data, int li
        case 0x78 ... 0x7b:
        case 0x7C ... 0x7f: gput_sum(cyc_sum, cyc, gput_sprite(16, 16)); break;
        case 0x80 ... 0x9f: // vid -> vid
+        rendered = do_vram_copy_pre(gpu, list, &cyc_tmp);
+        gput_sum(cyc_sum, cyc, cyc_tmp);
+        if (!rendered) {
+          skip = 1;
+          break;
+        }
          x =   LE16TOH(slist[4]) & 0x3ff;
          y =   LE16TOH(slist[5]) & 0x1ff;
          w = ((LE16TOH(slist[6]) - 1) & 0x3ff) + 1;
@@ -363,7 +369,6 @@ int gpu_async_do_cmd_list(struct psx_gpu *gpu, const uint32_t *list_data, int li
            add_draw_area(agpu, pos_added, 1, x, y, x + w, y + h);
            add_draw_area_e(agpu, pos_added + 1, 1, gpu->ex_regs);
          }
-        gput_sum(cyc_sum, cyc, gput_copy(w, h));
          break;
        case 0xa0 ... 0xbf: // sys -> vid
        case 0xc0 ... 0xdf: // vid -> sys
@@ -516,7 +521,7 @@ static STRHEAD_RET_TYPE gpu_async_thread(void *unused)
        const void *list = agpu->cmd_buffer + pos + done;
        switch (cmd) {
          case 0x80 ... 0x9f:
-          done += do_vram_copy(gpup->vram, agpu->ex_regs, list, &cycles_dummy);
+          done += do_vram_copy(gpup->vram, agpu->ex_regs, list);
            break;
          case FAKECMD_SCREEN_CHANGE:
            done += do_notify_screen_change(gpup, list);
author	notaz <notasas@gmail.com>
	Sat, 11 Apr 2026 23:38:25 +0000 (02:38 +0300)
committer	notaz <notasas@gmail.com>
	Sat, 11 Apr 2026 23:51:44 +0000 (02:51 +0300)
plugins/gpulib/gpu.c		patch \| blob \| blame \| history
plugins/gpulib/gpu.h		patch \| blob \| blame \| history
plugins/gpulib/gpu_async.c		patch \| blob \| blame \| history