From 0b4038f8edd327a3a9a2fbdefbc25ece921bc2ab Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 20 Aug 2023 22:38:03 +0300 Subject: [PATCH] gpu_neon: rework buffer selection to fix MGS codec and maybe more --- plugins/dfxvideo/gpulib_if.c | 6 +- plugins/gpu-gles/gpulib_if.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 14 +- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 171 ++++++++++++++--------- plugins/gpu_neon/psx_gpu_if.c | 108 ++++++++++---- plugins/gpu_senquack/gpulib_if.cpp | 6 +- plugins/gpu_unai/gpulib_if.cpp | 6 +- plugins/gpulib/gpu.c | 18 +-- plugins/gpulib/gpu.h | 10 +- plugins/gpulib/vout_pl.c | 3 + 12 files changed, 247 insertions(+), 109 deletions(-) diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 3a41cd7c..ba7f16a0 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -299,6 +299,10 @@ void renderer_notify_res_change(void) { } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + extern const unsigned char cmd_lengths[256]; int do_cmd_list(uint32_t *list, int list_len, int *last_cmd) @@ -414,7 +418,7 @@ void renderer_sync_ecmds(uint32_t *ecmds_) cmdSTP((unsigned char *)&ecmds[6]); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index 69285daa..b592175b 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -514,6 +514,10 @@ void renderer_notify_res_change(void) { } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + extern const unsigned char cmd_lengths[256]; // XXX: mostly dupe code from soft peops @@ -622,7 +626,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) cmdSTP((unsigned char *)&ecmds[6]); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { VRAMWrite.x = x; VRAMWrite.y = y; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 85cf89fa..e252d04e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4963,7 +4963,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; - psx_gpu->enhancement_x_threshold = 256; + psx_gpu->saved_hres = 256; } u64 get_us(void) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index bdd9caec..0ef957f2 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -185,18 +185,22 @@ typedef struct u32 *reciprocal_table_ptr; // enhancement stuff - u16 *enhancement_buf_ptr; - u16 *enhancement_current_buf_ptr; - u32 enhancement_x_threshold; + u16 *enhancement_buf_ptr; // main alloc + u16 *enhancement_current_buf_ptr; // offset into above, 4 bufs + u32 saved_hres; s16 saved_viewport_start_x; s16 saved_viewport_start_y; s16 saved_viewport_end_x; s16 saved_viewport_end_y; - u8 enhancement_buf_by_x16[64]; + u8 enhancement_buf_by_x16[64]; // 0-3 specifying which buf + u16 enhancement_buf_start[4]; // x pos where buf[n] begins + + u16 enhancement_scanout_x[4]; + u16 enhancement_scanout_select; // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[160]; + u8 reserved_a[142]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index 942b3d30..d7ec3409 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -1,6 +1,10 @@ +#define select_enhancement_buf_index(psx_gpu, x) \ + ((psx_gpu)->enhancement_buf_by_x16[(u32)(x) / \ + (1024u / sizeof((psx_gpu)->enhancement_buf_by_x16))]) + #define select_enhancement_buf_ptr(psx_gpu, x) \ ((psx_gpu)->enhancement_buf_ptr + \ - ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20)) + (select_enhancement_buf_index(psx_gpu, x) << 20)) #if !defined(NEON_BUILD) || defined(SIMD_BUILD) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 37622580..32c32fdb 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -250,10 +250,9 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, #define SET_Ex(r, v) #endif -vertex_struct vertexes[4] __attribute__((aligned(32))); - u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) { + vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; u32 current_command = 0, command_length; u32 *list_start = list; @@ -790,26 +789,61 @@ breakloop: #define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16)) +static int is_new_scanout(psx_gpu_struct *psx_gpu, int x) +{ + int i, scanout_x; + for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanout_x); i++) + { + scanout_x = psx_gpu->enhancement_scanout_x[i]; + if (x <= scanout_x && scanout_x < x + ENH_BUF_TABLE_STEP) + { + if (x != scanout_x) + log_anomaly("unaligned scanout x: %d,%d\n", scanout_x, x); + return 1; + } + } + return 0; +} + static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu) { - u32 b, x, s; + u32 b, x; b = 0; - s = psx_gpu->enhancement_x_threshold; - for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) + psx_gpu->enhancement_buf_by_x16[0] = b; + psx_gpu->enhancement_buf_start[0] = 0; + for (x = 1; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { - if (b < 3 && x * ENH_BUF_TABLE_STEP >= s) - { - s += psx_gpu->enhancement_x_threshold; + if (b < 3 && is_new_scanout(psx_gpu, x * ENH_BUF_TABLE_STEP)) { b++; + psx_gpu->enhancement_buf_start[b] = x * ENH_BUF_TABLE_STEP; } + psx_gpu->enhancement_buf_by_x16[x] = b; } +#if 0 + printf("buf_by_x16:\n"); + for (b = 0; b < 3; b++) { + int first = -1, count = 0; + for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { + if (psx_gpu->enhancement_buf_by_x16[x] == b) { + if (first < 0) first = x; + count++; + } + } + if (count) { + assert(first * ENH_BUF_TABLE_STEP == psx_gpu->enhancement_buf_start[b]); + printf("%d: %3zd-%zd\n", b, first * ENH_BUF_TABLE_STEP, + (first + count) * ENH_BUF_TABLE_STEP); + } + } +#endif } static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, u32 x0, u32 len) { +#if 0 u32 x, b; for (x = x0, b = 0; x >= len; b++) @@ -819,6 +853,7 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP, b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP); +#endif } #define select_enhancement_buf(psx_gpu) \ @@ -844,30 +879,27 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, } #define shift_vertices3(v) { \ - v[0]->x *= 2; \ - v[0]->y *= 2; \ - v[1]->x *= 2; \ - v[1]->y *= 2; \ - v[2]->x *= 2; \ - v[2]->y *= 2; \ + v[0]->x <<= 1; \ + v[0]->y <<= 1; \ + v[1]->x <<= 1; \ + v[1]->y <<= 1; \ + v[2]->x <<= 1; \ + v[2]->y <<= 1; \ } #define unshift_vertices3(v) { \ - v[0]->x /= 2; \ - v[0]->y /= 2; \ - v[1]->x /= 2; \ - v[1]->y /= 2; \ - v[2]->x /= 2; \ - v[2]->y /= 2; \ + v[0]->x >>= 1; \ + v[0]->y >>= 1; \ + v[1]->x >>= 1; \ + v[1]->y >>= 1; \ + v[2]->x >>= 1; \ + v[2]->y >>= 1; \ } #define shift_triangle_area() \ psx_gpu->triangle_area *= 4 -extern void scale2x_tiles8(void *dst, const void *src, int w8, int h); - #ifndef NEON_BUILD -// TODO? void scale2x_tiles8(void *dst, const void *src, int w8, int h) { uint16_t* d = (uint16_t*)dst; @@ -938,6 +970,16 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h) static int disable_main_render; +static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int x_end) +{ + // simple reject to avoid oveflowing the 1024 width + // (assume some offscreen render-to-texture thing) + if (x >= (int)(psx_gpu->saved_viewport_start_x + 512)) + return 0; + + return 1; +} + static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 current_command) { @@ -949,37 +991,22 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, if (!disable_main_render) render_triangle_p(psx_gpu, vertex_ptrs, current_command); + if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x)) + return; + enhancement_enable(); shift_vertices3(vertex_ptrs); shift_triangle_area(); render_triangle_p(psx_gpu, vertex_ptrs, current_command); + unshift_vertices3(vertex_ptrs); } static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 current_command) { - vertex_struct *vertex_ptrs[3]; - - if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { - if (!disable_main_render) - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - - enhancement_enable(); - shift_vertices3(vertex_ptrs); - shift_triangle_area(); - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - unshift_vertices3(vertex_ptrs); - } + do_triangle_enhanced(psx_gpu, vertexes, current_command); enhancement_disable(); - if (prepare_triangle(psx_gpu, &vertexes[1], vertex_ptrs)) { - if (!disable_main_render) - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - - enhancement_enable(); - shift_vertices3(vertex_ptrs); - shift_triangle_area(); - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - } + do_triangle_enhanced(psx_gpu, &vertexes[1], current_command); } #if 0 @@ -1062,6 +1089,7 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) { + vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; u32 current_command = 0, command_length; u32 *list_start = list; @@ -1097,12 +1125,24 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 width = list_s16[4] & 0x3FF; u32 height = list_s16[5] & 0x1FF; u32 color = list[0] & 0xFFFFFF; + u32 i1, i2; x &= ~0xF; width = ((width + 0xF) & ~0xF); + if (width == 0 || height == 0) + break; do_fill(psx_gpu, x, y, width, height, color); + i1 = select_enhancement_buf_index(psx_gpu, x); + i2 = select_enhancement_buf_index(psx_gpu, x + width - 1); + if (i1 != i2) { + sync_enhancement_buffers(x, y, width, height); + break; + } + if (x >= psx_gpu->enhancement_buf_start[i1] + psx_gpu->saved_hres) + break; + psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); x *= 2; y *= 2; @@ -1353,7 +1393,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 height = list_s16[5] & 0x1FF; render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + width)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); break; } @@ -1370,7 +1412,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, u, v, width, height, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + width)) + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); break; } @@ -1383,7 +1427,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 1)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); break; } @@ -1396,7 +1442,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 8)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); break; } @@ -1414,7 +1462,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, render_sprite(psx_gpu, x, y, u, v, 8, 8, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 8)) + do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); break; } @@ -1427,7 +1477,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 16)) + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); break; } @@ -1444,7 +1496,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, set_clut(psx_gpu, list_s16[5]); render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]); - do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); + + if (check_enhanced_range(psx_gpu, x, x + 16)) + do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); break; } @@ -1456,21 +1510,12 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 dy = list_s16[5] & 0x1FF; u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; - u16 *buf; if (sx == dx && sy == dy && psx_gpu->mask_msb == 0) break; render_block_move(psx_gpu, sx, sy, dx, dy, w, h); - if (dy + h > 512) - h = 512 - dy; - sx = sx & ~7; // FIXME? - dx = dx * 2 & ~7; - dy *= 2; - w = (w + 7) / 8; - buf = select_enhancement_buf_ptr(psx_gpu, dx / 2); - scale2x_tiles8(buf + dy * 1024 + dx, - psx_gpu->vram_ptr + sy * 1024 + sx, w, h); + sync_enhancement_buffers(dx, dy, w, h); break; } @@ -1542,7 +1587,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_start_y = viewport_start_y; w = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1; - d = psx_gpu->enhancement_x_threshold - w; + d = psx_gpu->saved_hres - w; if(-16 <= d && d <= 16) { update_enhancement_buf_table_from_x(psx_gpu, @@ -1579,7 +1624,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_end_y = viewport_end_y; w = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1; - d = psx_gpu->enhancement_x_threshold - w; + d = psx_gpu->saved_hres - w; if(-16 <= d && d <= 16) { update_enhancement_buf_table_from_x(psx_gpu, diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 30faee25..69a2a1bb 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -9,8 +9,11 @@ */ #include +#include #include +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths @@ -21,9 +24,12 @@ static int initialized; #define SET_Ex(r, v) \ ex_regs[r] = v +static __attribute__((noinline)) void +sync_enhancement_buffers(int x, int y, int w, int h); + +#include "../gpulib/gpu.h" #include "psx_gpu/psx_gpu.c" #include "psx_gpu/psx_gpu_parse.c" -#include "../gpulib/gpu.h" static psx_gpu_struct egpu __attribute__((aligned(256))); @@ -110,8 +116,12 @@ static __attribute__((noinline)) void sync_enhancement_buffers(int x, int y, int w, int h) { const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16); + int hres = egpu.saved_hres; + int x_buf, w1, s, fb_index; u16 *src, *dst; - int w1, fb_index; + + if (egpu.enhancement_buf_ptr == NULL) + return; w += x & (step_x - 1); x &= ~(step_x - 1); @@ -119,18 +129,29 @@ sync_enhancement_buffers(int x, int y, int w, int h) if (y + h > 512) h = 512 - y; + // find x_buf which is an offset into this enhancement_buf + fb_index = egpu.enhancement_buf_by_x16[x / step_x]; + x_buf = x - egpu.enhancement_buf_start[fb_index]; + while (w > 0) { fb_index = egpu.enhancement_buf_by_x16[x / step_x]; - for (w1 = 0; w > 0; w1++, w -= step_x) + for (w1 = 0; w > 0 && x_buf < hres; x_buf += step_x, w1++, w -= step_x) if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1]) break; + // skip further unneeded data, if any + for (s = 0; w > 0; s++, w -= step_x) + if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1 + s]) + break; - src = gpu.vram + y * 1024 + x; - dst = select_enhancement_buf_ptr(&egpu, x); - dst += (y * 1024 + x) * 2; - scale2x_tiles8(dst, src, w1 * step_x / 8, h); + if (w1 > 0) { + src = gpu.vram + y * 1024 + x; + dst = select_enhancement_buf_ptr(&egpu, x); + dst += (y * 1024 + x) * 2; + scale2x_tiles8(dst, src, w1 * step_x / 8, h); + } - x += w1 * step_x; + x += (w1 + s) * step_x; + x_buf = 0; } } @@ -139,11 +160,18 @@ void renderer_sync_ecmds(uint32_t *ecmds) gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1); - if (gpu.state.enhancement_active && !(gpu.status & PSX_GPU_STATUS_RGB24)) + + if (gpu.state.enhancement_active) { + if (state_changed) { + egpu.saved_hres = 0; + renderer_notify_res_change(); + return; + } sync_enhancement_buffers(x, y, w, h); + } } void renderer_flush_queues(void) @@ -162,11 +190,44 @@ void renderer_set_interlace(int enable, int is_odd) void renderer_notify_res_change(void) { - // note: must keep it multiple of 8 - if (egpu.enhancement_x_threshold != gpu.screen.hres) + renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); +} + +void renderer_notify_scanout_x_change(int x, int w) +{ + int hres = (w + 15) & ~15; + int max_bufs = ARRAY_SIZE(egpu.enhancement_scanout_x); + int need_update = 0; + int i; + + if (!gpu.state.enhancement_active) + return; + + assert(!(max_bufs & (max_bufs - 1))); + if (egpu.saved_hres != hres) { + for (i = 0; i < max_bufs; i++) + egpu.enhancement_scanout_x[i] = x; + need_update = 1; + } + + if (egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] != x) { - egpu.enhancement_x_threshold = gpu.screen.hres; + // maybe triple buffering? + for (i = 0; i < max_bufs; i++) + if (egpu.enhancement_scanout_x[i] == x) + break; + if (i == max_bufs) + need_update = 1; + + egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] = x; + } + egpu.enhancement_scanout_select++; + egpu.enhancement_scanout_select &= max_bufs - 1; + if (need_update) + { + egpu.saved_hres = hres; update_enhancement_buf_table_from_hres(&egpu); + sync_enhancement_buffers(0, 0, 1024, 512); } } @@ -174,23 +235,18 @@ void renderer_notify_res_change(void) void renderer_set_config(const struct rearmed_cbs *cbs) { - static int enhancement_was_on; - - disable_main_render = cbs->gpu_neon.enhancement_no_main; - if (egpu.enhancement_buf_ptr != NULL && cbs->gpu_neon.enhancement_enable - && !enhancement_was_on) - { - sync_enhancement_buffers(0, 0, 1024, 512); - } - enhancement_was_on = cbs->gpu_neon.enhancement_enable; - if (!initialized) { initialize_psx_gpu(&egpu, gpu.vram); initialized = 1; } - - if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL) - map_enhancement_buffer(); if (cbs->pl_set_gpu_caps) cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X); + + disable_main_render = cbs->gpu_neon.enhancement_no_main; + if (gpu.state.enhancement_enable) { + if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL) + map_enhancement_buffer(); + } } + +// vim:ts=2:sw=2:expandtab diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_senquack/gpulib_if.cpp index 5efc7d9d..0bc63c6d 100644 --- a/plugins/gpu_senquack/gpulib_if.cpp +++ b/plugins/gpu_senquack/gpulib_if.cpp @@ -144,6 +144,10 @@ void renderer_notify_res_change(void) */ } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + #ifdef USE_GPULIB // Handles GP0 draw settings commands 0xE1...0xE6 static void gpuGP0Cmd_0xEx(gpu_senquack_t &gpu_senquack, u32 cmd_word) @@ -613,7 +617,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) do_cmd_list(&ecmds[1], 6, &dummy); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { } diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 45eac41d..0064aaa3 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -163,6 +163,10 @@ void renderer_notify_res_change(void) { } +void renderer_notify_scanout_x_change(int x, int w) +{ +} + extern const unsigned char cmd_lengths[256]; int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) @@ -520,7 +524,7 @@ void renderer_sync_ecmds(uint32_t *ecmds) do_cmd_list(&ecmds[1], 6, &dummy); } -void renderer_update_caches(int x, int y, int w, int h) +void renderer_update_caches(int x, int y, int w, int h, int state_changed) { } diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 931583f3..b23f8a88 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -24,13 +24,8 @@ #define noinline #endif -#define gpu_log(fmt, ...) \ - printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__) - //#define log_io gpu_log #define log_io(...) -//#define log_anomaly gpu_log -#define log_anomaly(...) struct psx_gpu gpu; @@ -63,6 +58,7 @@ static noinline void do_reset(void) gpu.screen.hres = gpu.screen.w = 256; gpu.screen.vres = gpu.screen.h = 240; gpu.screen.x = gpu.screen.y = 0; + renderer_notify_res_change(); } static noinline void update_width(void) @@ -225,9 +221,11 @@ long GPUinit(void) ret = vout_init(); ret |= renderer_init(); + memset(&gpu.state, 0, sizeof(gpu.state)); + memset(&gpu.frameskip, 0, sizeof(gpu.frameskip)); + gpu.zero = 0; gpu.state.frame_count = &gpu.zero; gpu.state.hcnt = &gpu.zero; - gpu.frameskip.active = 0; gpu.cmd_len = 0; do_reset(); @@ -287,6 +285,7 @@ void GPUwriteStatus(uint32_t data) case 0x05: gpu.screen.src_x = data & 0x3ff; gpu.screen.src_y = (data >> 10) & 0x1ff; + renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); if (gpu.frameskip.set) { decide_frameskip_allow(gpu.ex_regs[3]); if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { @@ -434,7 +433,7 @@ static void finish_vram_transfer(int is_read) gpu.status &= ~PSX_GPU_STATUS_IMG; else renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y, - gpu.dma_start.w, gpu.dma_start.h); + gpu.dma_start.w, gpu.dma_start.h, 0); } static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd) @@ -740,7 +739,7 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1)); } renderer_sync_ecmds(gpu.ex_regs); - renderer_update_caches(0, 0, 1024, 512); + renderer_update_caches(0, 0, 1024, 512, 1); break; } @@ -775,6 +774,9 @@ void GPUupdateLace(void) } vout_update(); + if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active) + renderer_update_caches(0, 0, 1024, 512, 1); + gpu.state.enhancement_was_active = gpu.state.enhancement_active; gpu.state.fb_dirty = 0; gpu.state.blanked = 0; } diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 446a0234..b6bd60af 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -13,6 +13,12 @@ #include +#define gpu_log(fmt, ...) \ + printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__) + +//#define log_anomaly gpu_log +#define log_anomaly(...) + #ifdef __cplusplus extern "C" { #endif @@ -69,6 +75,7 @@ struct psx_gpu { uint32_t blanked:1; uint32_t enhancement_enable:1; uint32_t enhancement_active:1; + uint32_t enhancement_was_active:1; uint32_t dims_changed:1; uint32_t *frame_count; uint32_t *hcnt; /* hsync count */ @@ -111,11 +118,12 @@ struct rearmed_cbs; int renderer_init(void); void renderer_finish(void); void renderer_sync_ecmds(uint32_t * ecmds); -void renderer_update_caches(int x, int y, int w, int h); +void renderer_update_caches(int x, int y, int w, int h, int state_changed); void renderer_flush_queues(void); void renderer_set_interlace(int enable, int is_odd); void renderer_set_config(const struct rearmed_cbs *config); void renderer_notify_res_change(void); +void renderer_notify_scanout_x_change(int x, int w); int vout_init(void); int vout_finish(void); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 26827d0e..eadf57ce 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -75,8 +75,11 @@ void vout_update(void) check_mode_change(0); if (gpu.state.enhancement_active) { + if (!gpu.state.enhancement_was_active) + return; // buffer not ready yet vram = gpu.get_enhancement_bufer(&src_x, &src_y, &w, &h, &vram_h); x *= 2; y *= 2; + src_x2 *= 2; } if (src_y + h > vram_h) { -- 2.39.2