From cf63d2c52df50c60e7513ccb47b0ee64a6b8ec02 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 26 Jan 2026 02:55:42 +0200 Subject: [PATCH] gpulib: attempt to improve some frameskip logic --- plugins/gpulib/gpu.c | 84 +++++++++++++++++++++++--------------- plugins/gpulib/gpu.h | 1 + plugins/gpulib/gpu_async.c | 9 ++-- 3 files changed, 59 insertions(+), 35 deletions(-) diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index adba0e41..50f4f9d3 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -64,7 +64,6 @@ static noinline void do_cmd_reset(struct psx_gpu *gpu) sync_ecmds_status_bits(gpu); } gpu->cmd_len = 0; - sync_renderer(gpu); if (unlikely(gpu->dma.h > 0)) finish_vram_transfer(gpu, gpu->dma_start.is_read, 0); @@ -76,6 +75,7 @@ static noinline void do_reset(struct psx_gpu *gpu) unsigned int i; do_cmd_reset(gpu); + sync_renderer(gpu); memset(gpu->regs, 0, sizeof(gpu->regs)); for (i = 0; i < sizeof(gpu->ex_regs) / sizeof(gpu->ex_regs[0]); i++) @@ -189,6 +189,24 @@ static noinline void update_height(struct psx_gpu *gpu) // gpu->screen.y1, gpu->screen.y2, y, sh, vres); } +static noinline void frameskip_on_no_skip(struct psx_gpu *gpu) +{ + if (gpu->frameskip.ecmds_dirty_renderer) { + gpu->frameskip.ecmds_dirty_renderer = 0; + sync_renderer_ecmds(gpu); + } + if (gpu->frameskip.pending_fill[0] != 0) { + int dummy = 0; + if (gpu_async_enabled(gpu)) + (void)gpu_async_do_cmd_list(gpu, gpu->frameskip.pending_fill, 3, + &dummy, &dummy, &dummy); + else + renderer_do_cmd_list(gpu->frameskip.pending_fill, 3, gpu->ex_regs, + &dummy, &dummy, &dummy); + gpu->frameskip.pending_fill[0] = 0; + } +} + static noinline void decide_frameskip(struct psx_gpu *gpu) { *gpu->frameskip.dirty = 1; @@ -208,17 +226,6 @@ static noinline void decide_frameskip(struct psx_gpu *gpu) gpu->frameskip.active = 1; else gpu->frameskip.active = 0; - - if (!gpu->frameskip.active && gpu->frameskip.pending_fill[0] != 0) { - int dummy = 0; - if (gpu_async_enabled(gpu)) - (void)gpu_async_do_cmd_list(gpu, gpu->frameskip.pending_fill, 3, - &dummy, &dummy, &dummy); - else - renderer_do_cmd_list(gpu->frameskip.pending_fill, 3, gpu->ex_regs, - &dummy, &dummy, &dummy); - gpu->frameskip.pending_fill[0] = 0; - } } static noinline int decide_frameskip_allow(struct psx_gpu *gpu) @@ -347,7 +354,7 @@ void GPUwriteStatus(uint32_t data) { uint32_t cmd = data >> 24; uint32_t fb_dirty = 1; - int src_x, src_y; + int src_x, src_y, changed; if (cmd < ARRAY_SIZE(gpu.regs)) { if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data) @@ -378,7 +385,11 @@ void GPUwriteStatus(uint32_t data) break; case 0x05: src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff; - if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) { + changed = src_x != gpu.screen.src_x || src_y != gpu.screen.src_y; + // last_flip_frame check allows frameskip on dheight games + // that always set the same display area address + if (changed || gpu.frameskip.last_flip_frame != *gpu.state.frame_count) + { gpu.screen.src_x = src_x; gpu.screen.src_y = src_y; if (gpu_async_enabled(&gpu)) @@ -387,11 +398,12 @@ void GPUwriteStatus(uint32_t data) renderer_notify_screen_change(&gpu.screen); if (gpu.frameskip.set) { decide_frameskip_allow(&gpu); - if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { + if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) decide_frameskip(&gpu); - gpu.frameskip.last_flip_frame = *gpu.state.frame_count; - } + if (!gpu.frameskip.active || !gpu.frameskip.allow) + frameskip_on_no_skip(&gpu); } + gpu.frameskip.last_flip_frame = *gpu.state.frame_count; } break; case 0x06: @@ -644,11 +656,10 @@ static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data, int li { uint32_t cyc_sum = 0, cyc = *cpu_cycles_last; int cmd = 0, pos, len; - int skip = 1; gpu->frameskip.pending_fill[0] = 0; - for (pos = 0; pos < list_len && skip; pos += len) + for (pos = 0; pos < list_len; pos += len) { uint32_t *list = data + pos; const int16_t *slist = (void *)list; @@ -691,6 +702,7 @@ static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data, int li do_texpage: gpu->ex_regs[1] &= ~0x1ff; gpu->ex_regs[1] |= (LE32TOH(list[4 + ((cmd >> 4) & 1)]) >> 16) & 0x1ff; + gpu->frameskip.ecmds_dirty_renderer = 1; break; case 0x40 ... 0x47: gput_sum(cyc_sum, cyc, gput_line(0)); @@ -748,11 +760,18 @@ static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data, int li case 0xc0 ... 0xdf: // vid -> sys goto breakloop; case 0xe3: - skip = decide_frameskip_allow(gpu); - // fallthrough + gpu->ex_regs[cmd & 7] = LE32TOH(list[0]); + gpu->frameskip.ecmds_dirty_renderer = 1; + if (!decide_frameskip_allow(gpu)) { + frameskip_on_no_skip(gpu); + pos += len; + goto breakloop; + } + break; case 0xe0 ... 0xe2: case 0xe4 ... 0xe7: gpu->ex_regs[cmd & 7] = LE32TOH(list[0]); + gpu->frameskip.ecmds_dirty_renderer = 1; break; default: break; @@ -760,7 +779,6 @@ static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data, int li } breakloop: - sync_renderer_ecmds(gpu); *cpu_cycles_sum_out += cyc_sum; *cpu_cycles_last = cyc; *last_cmd = cmd; @@ -829,12 +847,9 @@ static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count continue; } - if (gpu->frameskip.active && - (gpu->frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) { - // 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip + if (gpu->frameskip.active && gpu->frameskip.allow) pos += do_cmd_list_skip(gpu, data + pos, count - pos, cycles_sum, cycles_last, &cmd); - } else if (gpu_async_enabled(gpu)) { pos += gpu_async_do_cmd_list(gpu, data + pos, count - pos, cycles_sum, cycles_last, &cmd); @@ -1065,6 +1080,14 @@ void GPUupdateLace(void) { int updated = 0; + if (gpu.frameskip.set && *gpu.state.frame_count - gpu.frameskip.last_flip_frame >= 10) { + gpu.frameskip.frame_ready = 1; + if (gpu.frameskip.active) { + gpu.frameskip.active = 0; + frameskip_on_no_skip(&gpu); + } + } + if (gpu.cmd_len > 0) { flush_cmd_buffer(&gpu); sync_ecmds_status_bits(&gpu); @@ -1082,16 +1105,13 @@ void GPUupdateLace(void) if (!gpu.state.fb_dirty) return; -#endif if (gpu.frameskip.set) { - if (!gpu.frameskip.frame_ready) { - if (*gpu.state.frame_count - gpu.frameskip.last_flip_frame < 9) - return; - gpu.frameskip.active = 0; - } + if (!gpu.frameskip.frame_ready) + return; gpu.frameskip.frame_ready = 0; } +#endif if (gpu_async_enabled(&gpu)) gpu_async_sync_scanout(&gpu); diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 41288458..c98ce136 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -112,6 +112,7 @@ struct psx_gpu { uint32_t active:1; uint32_t allow:1; uint32_t frame_ready:1; + uint32_t ecmds_dirty_renderer:1; const int *advice; const int *force; int *dirty; diff --git a/plugins/gpulib/gpu_async.c b/plugins/gpulib/gpu_async.c index 20439469..a6c6f8b4 100644 --- a/plugins/gpulib/gpu_async.c +++ b/plugins/gpulib/gpu_async.c @@ -126,6 +126,7 @@ static void run_thread_nolock(struct psx_gpu_async *agpu) if (agpu->idle) { agpu->idle = 0; scond_signal(agpu->cond_use); + //agpu_log(&gpu, "%u/%u kick\n", RDPOS(agpu->pos_used), agpu->pos_added); } } @@ -256,7 +257,7 @@ int gpu_async_do_cmd_list(struct psx_gpu *gpu, const uint32_t *list_data, int li h = LE16TOH(slist[5]) & 0x1ff; darea = &agpu->draw_areas[agpu->pos_area]; if (x < darea->x0 || x + w > darea->x1 || y < darea->y0 || y + h > darea->y1) { - // let the thread know about changes outside of drawing area + // let sync_scanout() know about changes outside of drawing area agpu_log(gpu, "agpu: fill %d,%d %dx%d vs area %d,%d %dx%d\n", x, y, w, h, darea->x0, darea->y0, darea->x1 - darea->x0, darea->y1 - darea->y0); add_draw_area(agpu, agpu->pos_added, 1, x, y, x + w, y + h); @@ -464,6 +465,7 @@ static STRHEAD_RET_TYPE gpu_async_thread(void *unused) assert(0); } agpu->idle = 1; + //agpu_log(&gpu, "%u/%u sleep\n", agpu->pos_used, RDPOS(agpu->pos_added)); scond_wait(agpu->cond_use, agpu->lock); continue; } @@ -683,8 +685,9 @@ void gpu_async_sync_scanout(struct psx_gpu *gpu) } if (c > 0) { i = (i + 1) & AGPU_AREAS_MASK; - agpu_log(gpu, "agpu: wait %d/%d\n", agpu->draw_areas[i].pos - agpu->pos_used, - agpu->pos_added - agpu->pos_used); + agpu_log(gpu, "agpu: wait %d/%d @ %u/%u\n", + agpu->draw_areas[i].pos - RDPOS(agpu->pos_used), agpu->pos_added - + RDPOS(agpu->pos_used), RDPOS(agpu->pos_used), agpu->pos_added); slock_lock(agpu->lock); if (!agpu->idle) { assert(agpu->wait_mode == waitmode_none); -- 2.47.3