From ec663f4d67ae68ef4bafef40d9e2e99c386edb2a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 12 Apr 2026 02:38:25 +0300 Subject: [PATCH] gpulib: fix missing updates on vram copy notaz/pcsx_rearmed#413 --- plugins/gpulib/gpu.c | 72 ++++++++++++++++++++++++-------------- plugins/gpulib/gpu.h | 4 +-- plugins/gpulib/gpu_async.c | 11 ++++-- 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 0e967d6e..b3470461 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -239,18 +239,29 @@ static noinline void decide_frameskip(struct psx_gpu *gpu, uint32_t flip_delay) gpu->frameskip.active = 0; } +static int check_screen_intersect(struct psx_gpu *gpu, int x, int y, int w, int h) +{ + int32_t screen_r = gpu->screen.src_x + gpu->screen.w; + int32_t screen_b = gpu->screen.src_y + gpu->screen.h; + int32_t dst_r = x + w, dst_b = y + h; + int32_t no_intersect; + no_intersect = screen_r - x - 1; + no_intersect |= screen_b - y - 1; + no_intersect |= dst_r - gpu->screen.src_x - 1; + no_intersect |= dst_b - gpu->screen.src_y - 1; + no_intersect >>= 31; + return !no_intersect; +} + static noinline void check_draw_to_display(struct psx_gpu *gpu) { uint32_t cmd_e3 = gpu->ex_regs[3]; - uint32_t x1 = cmd_e3 & 0x3ff, y1 = (cmd_e3 >> 10) & 0x3ff; - uint32_t x2 = gpu->screen.src_x, y2 = gpu->screen.src_y; - uint32_t w = gpu->screen.w, h = gpu->screen.h; - uint32_t no_intersect = - x1 + w <= x2 || x2 + w <= x1 || y1 + h <= y2 || y2 + h <= y1; - gpu->state.draw_display_intersect = !no_intersect; + uint32_t x1 = cmd_e3 & 0x3ff, y1 = (cmd_e3 >> 10) & 0x3ff; + int intersect = check_screen_intersect(gpu, x1, y1, gpu->screen.w, gpu->screen.h); + gpu->state.draw_display_intersect = intersect; // no frameskip if it decides to draw to display area, // but not for interlace since it'll most likely always do that - gpu->frameskip.allow = no_intersect || (gpu->status & PSX_GPU_STATUS_INTERLACE); + gpu->frameskip.allow = !intersect || (gpu->status & PSX_GPU_STATUS_INTERLACE); } static void flush_cmd_buffer(struct psx_gpu *gpu); @@ -598,20 +609,12 @@ static void finish_vram_transfer(struct psx_gpu *gpu, int is_read, int is_async) if (is_read) gpu->status &= ~PSX_GPU_STATUS_IMG; else { - int32_t screen_r = gpu->screen.src_x + gpu->screen.hres; - int32_t screen_b = gpu->screen.src_y + gpu->screen.vres; - int32_t dma_r = gpu->dma_start.x + gpu->dma_start.w; - int32_t dma_b = gpu->dma_start.y + gpu->dma_start.h; - int32_t not_dirty; - not_dirty = screen_r - gpu->dma_start.x - 1; - not_dirty |= screen_b - gpu->dma_start.y - 1; - not_dirty |= dma_r - gpu->screen.src_x - 1; - not_dirty |= dma_b - gpu->screen.src_y - 1; - not_dirty >>= 31; + int intersect = check_screen_intersect(gpu, gpu->dma_start.x, gpu->dma_start.y, + gpu->dma_start.w, gpu->dma_start.h); log_io(gpu, "dma %3d,%3d %dx%d scr %3d,%3d %3dx%3d -> dirty %d\n", gpu->dma_start.x, gpu->dma_start.y, gpu->dma_start.w, gpu->dma_start.h, - gpu->screen.src_x, gpu->screen.src_y, gpu->screen.hres, gpu->screen.vres, !not_dirty); - gpu->state.fb_dirty_display_area |= !not_dirty; + gpu->screen.src_x, gpu->screen.src_y, gpu->screen.hres, gpu->screen.vres, intersect); + gpu->state.fb_dirty_display_area |= intersect; gpu->state.fb_dirty = 1; if (!is_async) renderer_update_caches(gpu->dma_start.x, gpu->dma_start.y, @@ -621,8 +624,27 @@ static void finish_vram_transfer(struct psx_gpu *gpu, int is_read, int is_async) gpu->gpu_state_change(PGS_VRAM_TRANSFER_END, 0); } -int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs, - const uint32_t *params, int *cpu_cycles) +int do_vram_copy_pre(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles) +{ + const uint32_t sx = LE32TOH(params[1]) & 0x3FF; + const uint32_t sy = (LE32TOH(params[1]) >> 16) & 0x1FF; + const uint32_t dx = LE32TOH(params[2]) & 0x3FF; + const uint32_t dy = (LE32TOH(params[2]) >> 16) & 0x1FF; + uint32_t w = ((LE32TOH(params[3]) - 1) & 0x3FF) + 1; + uint32_t h = (((LE32TOH(params[3]) >> 16) - 1) & 0x1FF) + 1; + int intersect; + + *cpu_cycles = gput_copy(w, h); + if (sx == dx && sy == dy && !(gpu->ex_regs[6] & 0x8000)) + return 0; + + intersect = check_screen_intersect(gpu, dx, dy, w, h); + gpu->state.fb_dirty_display_area |= intersect; + gpu->state.fb_dirty = 1; + return 1; +} + +int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs, const uint32_t *params) { const uint32_t sx = LE32TOH(params[1]) & 0x3FF; const uint32_t sy = (LE32TOH(params[1]) >> 16) & 0x1FF; @@ -634,10 +656,6 @@ int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs, uint16_t lbuf[128]; uint32_t x, y; - *cpu_cycles += gput_copy(w, h); - if (sx == dx && sy == dy && msb == 0) - return 4; - renderer_flush_queues(); if (unlikely((sx < dx && dx < sx + w) || sx + w > 1024 || dx + w > 1024 || msb)) @@ -855,8 +873,8 @@ static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count break; *cycles_sum += *cycles_last; *cycles_last = 0; - do_vram_copy(gpu->vram, gpu->ex_regs, data + pos, cycles_last); - vram_dirty = 1; + if (do_vram_copy_pre(gpu, data + pos, cycles_last)) + do_vram_copy(gpu->vram, gpu->ex_regs, data + pos); pos += 4; continue; case 0x00: diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 371706ab..6073fbf0 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -162,8 +162,8 @@ void vout_set_config(const struct rearmed_cbs *config); // helpers #define VRAM_MEM_XY(vram_, x, y) &vram_[(y) * 1024 + (x)] -int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs, - const uint32_t *params, int *cpu_cycles); +int do_vram_copy_pre(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles); +int do_vram_copy(uint16_t *vram, const uint32_t *ex_regs, const uint32_t *params); int prim_try_simplify_quad_t (void *simplified, const void *prim); int prim_try_simplify_quad_gt(void *simplified, const void *prim); diff --git a/plugins/gpulib/gpu_async.c b/plugins/gpulib/gpu_async.c index 373663d4..a67509b3 100644 --- a/plugins/gpulib/gpu_async.c +++ b/plugins/gpulib/gpu_async.c @@ -264,7 +264,7 @@ int gpu_async_do_cmd_list(struct psx_gpu *gpu, const uint32_t *list_data, int li const uint32_t *list = list_data + pos; const int16_t *slist = (void *)list; const struct pos_drawarea *darea; - int rendered = 1, skip = 0; + int rendered = 1, skip = 0, cyc_tmp; int num_vertexes, x, y, w, h; cmd = LE32TOH(list[0]) >> 24; @@ -352,6 +352,12 @@ int gpu_async_do_cmd_list(struct psx_gpu *gpu, const uint32_t *list_data, int li case 0x78 ... 0x7b: case 0x7C ... 0x7f: gput_sum(cyc_sum, cyc, gput_sprite(16, 16)); break; case 0x80 ... 0x9f: // vid -> vid + rendered = do_vram_copy_pre(gpu, list, &cyc_tmp); + gput_sum(cyc_sum, cyc, cyc_tmp); + if (!rendered) { + skip = 1; + break; + } x = LE16TOH(slist[4]) & 0x3ff; y = LE16TOH(slist[5]) & 0x1ff; w = ((LE16TOH(slist[6]) - 1) & 0x3ff) + 1; @@ -363,7 +369,6 @@ int gpu_async_do_cmd_list(struct psx_gpu *gpu, const uint32_t *list_data, int li add_draw_area(agpu, pos_added, 1, x, y, x + w, y + h); add_draw_area_e(agpu, pos_added + 1, 1, gpu->ex_regs); } - gput_sum(cyc_sum, cyc, gput_copy(w, h)); break; case 0xa0 ... 0xbf: // sys -> vid case 0xc0 ... 0xdf: // vid -> sys @@ -516,7 +521,7 @@ static STRHEAD_RET_TYPE gpu_async_thread(void *unused) const void *list = agpu->cmd_buffer + pos + done; switch (cmd) { case 0x80 ... 0x9f: - done += do_vram_copy(gpup->vram, agpu->ex_regs, list, &cycles_dummy); + done += do_vram_copy(gpup->vram, agpu->ex_regs, list); break; case FAKECMD_SCREEN_CHANGE: done += do_notify_screen_change(gpup, list); -- 2.47.3