From 2da2fc7676c1fc40d26226a7a4c43728d9a2eedf Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 23 Oct 2023 21:00:35 +0300 Subject: [PATCH] gpu_neon: rework buffering to reduce flickering ... maybe notaz/pcsx_rearmed#324 --- frontend/plat_sdl.c | 7 + frontend/plugin_lib.c | 18 +- frontend/plugin_lib.h | 1 + plugins/dfxvideo/gpulib_if.c | 2 +- plugins/gpu-gles/gpulib_if.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu.c | 5 + plugins/gpu_neon/psx_gpu/psx_gpu.h | 16 +- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 22 +- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 31 +-- .../gpu_neon/psx_gpu/psx_gpu_offsets_update.c | 46 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 228 ++++++++++-------- plugins/gpu_neon/psx_gpu_if.c | 112 +++------ plugins/gpu_senquack/gpulib_if.cpp | 2 +- plugins/gpu_unai/gpulib_if.cpp | 2 +- plugins/gpulib/gpu.c | 22 +- plugins/gpulib/gpu.h | 2 +- plugins/gpulib/vout_pl.c | 2 + 17 files changed, 252 insertions(+), 268 deletions(-) diff --git a/frontend/plat_sdl.c b/frontend/plat_sdl.c index 5f29b90c..c5570253 100644 --- a/frontend/plat_sdl.c +++ b/frontend/plat_sdl.c @@ -328,6 +328,13 @@ void plat_video_menu_end(void) void plat_video_menu_leave(void) { + void *fb = NULL; + if (plat_sdl_overlay != NULL || plat_sdl_gl_active) + fb = shadow_fb; + else if (plat_sdl_screen) + fb = plat_sdl_screen->pixels; + if (fb) + memset(fb, 0, g_menuscreen_w * g_menuscreen_h * 2); in_menu = 0; } diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 2339028e..50aba227 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -134,7 +134,7 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h) static const unsigned short colors[2] = { 0x1fe3, 0x0700 }; unsigned short *dest = (unsigned short *)pl_vout_buf + - vout_w * (vout_h - HUD_HEIGHT) + vout_w / 2 - 192/2; + pl_vout_w * (vout_h - HUD_HEIGHT) + pl_vout_w / 2 - 192/2; unsigned short *d, p; int c, x, y; @@ -149,7 +149,7 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h) (fmod_chans & (1< 0) { + if (flip_clear_counter > 0) { if (pl_plat_clear) pl_plat_clear(); else memset(pl_vout_buf, 0, dstride * h_full * pl_vout_bpp / 8); - clear_counter--; + flip_clear_counter--; } if (pl_plat_blit) diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index efd7d1e2..97d44f25 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -45,6 +45,7 @@ void pl_start_watchdog(void); void *pl_prepare_screenshot(int *w, int *h, int *bpp); void pl_init(void); void pl_switch_dispmode(void); +void pl_force_clear(void); void pl_timing_prepare(int is_pal); void pl_frame_limit(void); diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 978e7d84..d08ca67e 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -299,7 +299,7 @@ void renderer_notify_res_change(void) { } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index 923f652e..a3a0c43b 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -514,7 +514,7 @@ void renderer_notify_res_change(void) { } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index ea3641f8..62080f3f 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,7 @@ #include "vector_ops.h" #endif #include "psx_gpu_simd.h" +#include "psx_gpu_offsets.h" #if 0 void dump_r_d(const char *name, void *dump); @@ -5012,6 +5014,9 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; psx_gpu->saved_hres = 256; + + // check some offset + psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0; } u64 get_us(void) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 4eb622df..da9e3426 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -141,8 +141,6 @@ typedef struct u32 triangle_color; u32 dither_table[4]; - u32 uvrgb_phase; - struct render_block_handler_struct *render_block_handler; void *texture_page_ptr; void *texture_page_base; @@ -150,6 +148,8 @@ typedef struct u16 *vram_ptr; u16 *vram_out_ptr; + u32 uvrgb_phase; + u16 render_state_base; u16 render_state; @@ -194,15 +194,15 @@ typedef struct s16 saved_viewport_start_y; s16 saved_viewport_end_x; s16 saved_viewport_end_y; - u8 enhancement_buf_by_x16[64]; // 0-3 specifying which buf - u16 enhancement_buf_start[4]; // x pos where buf[n] begins - - u16 enhancement_scanout_x[4]; - u16 enhancement_scanout_select; + struct psx_gpu_scanout { + u16 x, y, w, h; + } enhancement_scanouts[4]; // 0-3 specifying which buf to use + u16 enhancement_scanout_eselect; // eviction selector + u16 enhancement_current_buf; // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[142]; + u8 reserved_a[188 + 9*4 - 9*sizeof(void *)]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index bd6c7a1f..7b3ee85a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -1,11 +1,3 @@ -#define select_enhancement_buf_index(psx_gpu, x) \ - ((psx_gpu)->enhancement_buf_by_x16[(u32)(x) / \ - (1024u / sizeof((psx_gpu)->enhancement_buf_by_x16))]) - -#define select_enhancement_buf_ptr(psx_gpu, x) \ - ((psx_gpu)->enhancement_buf_ptr + \ - (select_enhancement_buf_index(psx_gpu, x) << 20)) - #if !defined(NEON_BUILD) || defined(SIMD_BUILD) #ifndef zip_4x32b @@ -325,12 +317,12 @@ render_block_handler_struct render_sprite_block_handlers_4x[] = render_sprite_blocks_switch_block_4x() }; - void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 width, s32 height, u32 flags, u32 color) { s32 x_right = x + width - 1; s32 y_bottom = y + height - 1; + s16 end_x; #ifdef PROFILE sprites++; @@ -352,8 +344,12 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, height -= clip; } - if(x_right > psx_gpu->viewport_end_x) - width -= x_right - psx_gpu->viewport_end_x; + end_x = psx_gpu->viewport_end_x; + if (end_x - psx_gpu->viewport_start_x + 1 > 512) + end_x = psx_gpu->viewport_start_x + 511; + + if(x_right > end_x) + width -= x_right - end_x; if(y_bottom > psx_gpu->viewport_end_y) height -= y_bottom - psx_gpu->viewport_end_y; @@ -361,7 +357,9 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if((width <= 0) || (height <= 0)) return; - psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); + if (!psx_gpu->enhancement_current_buf_ptr) + return; + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; x *= 2; y *= 2; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h index 161384e9..2f8a6463 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -1,7 +1,6 @@ #ifndef __P_PSX_GPU_OFFSETS_H__ #define __P_PSX_GPU_OFFSETS_H__ -#define psx_gpu_test_mask_offset 0x0 #define psx_gpu_uvrg_offset 0x10 #define psx_gpu_uvrg_dx_offset 0x20 #define psx_gpu_uvrg_dy_offset 0x30 @@ -13,23 +12,18 @@ #define psx_gpu_b_offset 0x90 #define psx_gpu_b_dy_offset 0x94 #define psx_gpu_triangle_area_offset 0x98 -#define psx_gpu_texture_window_settings_offset 0x9c #define psx_gpu_current_texture_mask_offset 0xa0 -#define psx_gpu_viewport_mask_offset 0xa4 #define psx_gpu_dirty_textures_4bpp_mask_offset 0xa8 #define psx_gpu_dirty_textures_8bpp_mask_offset 0xac #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 0xb0 #define psx_gpu_triangle_color_offset 0xb4 #define psx_gpu_dither_table_offset 0xb8 -#define psx_gpu_uvrgb_phase_offset 0xc8 -#define psx_gpu_render_block_handler_offset 0xcc -#define psx_gpu_texture_page_ptr_offset 0xd0 -#define psx_gpu_texture_page_base_offset 0xd4 -#define psx_gpu_clut_ptr_offset 0xd8 -#define psx_gpu_vram_ptr_offset 0xdc -#define psx_gpu_vram_out_ptr_offset 0xe0 -#define psx_gpu_render_state_base_offset 0xe4 -#define psx_gpu_render_state_offset 0xe6 +#define psx_gpu_texture_page_ptr_offset 0xcc +#define psx_gpu_texture_page_base_offset 0xd0 +#define psx_gpu_clut_ptr_offset 0xd4 +#define psx_gpu_vram_ptr_offset 0xd8 +#define psx_gpu_vram_out_ptr_offset 0xdc +#define psx_gpu_uvrgb_phase_offset 0xe0 #define psx_gpu_num_spans_offset 0xe8 #define psx_gpu_num_blocks_offset 0xea #define psx_gpu_viewport_start_x_offset 0xec @@ -38,26 +32,13 @@ #define psx_gpu_viewport_end_y_offset 0xf2 #define psx_gpu_mask_msb_offset 0xf4 #define psx_gpu_triangle_winding_offset 0xf6 -#define psx_gpu_display_area_draw_enable_offset 0xf7 #define psx_gpu_current_texture_page_offset 0xf8 -#define psx_gpu_last_8bpp_texture_page_offset 0xf9 #define psx_gpu_texture_mask_width_offset 0xfa #define psx_gpu_texture_mask_height_offset 0xfb -#define psx_gpu_texture_window_x_offset 0xfc -#define psx_gpu_texture_window_y_offset 0xfd -#define psx_gpu_primitive_type_offset 0xfe -#define psx_gpu_render_mode_offset 0xff -#define psx_gpu_offset_x_offset 0x100 -#define psx_gpu_offset_y_offset 0x102 -#define psx_gpu_clut_settings_offset 0x104 -#define psx_gpu_texture_settings_offset 0x106 #define psx_gpu_reciprocal_table_ptr_offset 0x108 #define psx_gpu_blocks_offset 0x200 #define psx_gpu_span_uvrg_offset_offset 0x2200 #define psx_gpu_span_edge_data_offset 0x4200 #define psx_gpu_span_b_offset_offset 0x5200 -#define psx_gpu_texture_4bpp_cache_offset 0x5a00 -#define psx_gpu_texture_8bpp_even_cache_offset 0x205a00 -#define psx_gpu_texture_8bpp_odd_cache_offset 0x305a00 #endif /* __P_PSX_GPU_OFFSETS_H__ */ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c index b1de121e..9b378482 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -4,7 +4,7 @@ #include "common.h" #define WRITE_OFFSET(f, member) \ - fprintf(f, "#define %-50s0x%x\n", \ + fprintf(f, "#define %-50s0x%zx\n", \ "psx_gpu_" #member "_offset", \ offsetof(psx_gpu_struct, member)); @@ -22,8 +22,10 @@ int main() perror("fopen"); return 1; } + fputs("#ifndef __P_PSX_GPU_OFFSETS_H__\n", f); + fputs("#define __P_PSX_GPU_OFFSETS_H__\n\n", f); - WRITE_OFFSET(f, test_mask); + //WRITE_OFFSET(f, test_mask); WRITE_OFFSET(f, uvrg); WRITE_OFFSET(f, uvrg_dx); WRITE_OFFSET(f, uvrg_dy); @@ -35,23 +37,23 @@ int main() WRITE_OFFSET(f, b); WRITE_OFFSET(f, b_dy); WRITE_OFFSET(f, triangle_area); - WRITE_OFFSET(f, texture_window_settings); + //WRITE_OFFSET(f, texture_window_settings); WRITE_OFFSET(f, current_texture_mask); - WRITE_OFFSET(f, viewport_mask); + //WRITE_OFFSET(f, viewport_mask); WRITE_OFFSET(f, dirty_textures_4bpp_mask); WRITE_OFFSET(f, dirty_textures_8bpp_mask); WRITE_OFFSET(f, dirty_textures_8bpp_alternate_mask); WRITE_OFFSET(f, triangle_color); WRITE_OFFSET(f, dither_table); - WRITE_OFFSET(f, uvrgb_phase); - WRITE_OFFSET(f, render_block_handler); + //WRITE_OFFSET(f, render_block_handler); WRITE_OFFSET(f, texture_page_ptr); WRITE_OFFSET(f, texture_page_base); WRITE_OFFSET(f, clut_ptr); WRITE_OFFSET(f, vram_ptr); WRITE_OFFSET(f, vram_out_ptr); - WRITE_OFFSET(f, render_state_base); - WRITE_OFFSET(f, render_state); + WRITE_OFFSET(f, uvrgb_phase); + //WRITE_OFFSET(f, render_state_base); + //WRITE_OFFSET(f, render_state); WRITE_OFFSET(f, num_spans); WRITE_OFFSET(f, num_blocks); WRITE_OFFSET(f, viewport_start_x); @@ -60,27 +62,29 @@ int main() WRITE_OFFSET(f, viewport_end_y); WRITE_OFFSET(f, mask_msb); WRITE_OFFSET(f, triangle_winding); - WRITE_OFFSET(f, display_area_draw_enable); + //WRITE_OFFSET(f, display_area_draw_enable); WRITE_OFFSET(f, current_texture_page); - WRITE_OFFSET(f, last_8bpp_texture_page); + //WRITE_OFFSET(f, last_8bpp_texture_page); WRITE_OFFSET(f, texture_mask_width); WRITE_OFFSET(f, texture_mask_height); - WRITE_OFFSET(f, texture_window_x); - WRITE_OFFSET(f, texture_window_y); - WRITE_OFFSET(f, primitive_type); - WRITE_OFFSET(f, render_mode); - WRITE_OFFSET(f, offset_x); - WRITE_OFFSET(f, offset_y); - WRITE_OFFSET(f, clut_settings); - WRITE_OFFSET(f, texture_settings); + //WRITE_OFFSET(f, texture_window_x); + //WRITE_OFFSET(f, texture_window_y); + //WRITE_OFFSET(f, primitive_type); + //WRITE_OFFSET(f, render_mode); + //WRITE_OFFSET(f, offset_x); + //WRITE_OFFSET(f, offset_y); + //WRITE_OFFSET(f, clut_settings); + //WRITE_OFFSET(f, texture_settings); WRITE_OFFSET(f, reciprocal_table_ptr); WRITE_OFFSET(f, blocks); WRITE_OFFSET(f, span_uvrg_offset); WRITE_OFFSET(f, span_edge_data); WRITE_OFFSET(f, span_b_offset); - WRITE_OFFSET(f, texture_4bpp_cache); - WRITE_OFFSET(f, texture_8bpp_even_cache); - WRITE_OFFSET(f, texture_8bpp_odd_cache); + //WRITE_OFFSET(f, texture_4bpp_cache); + //WRITE_OFFSET(f, texture_8bpp_even_cache); + //WRITE_OFFSET(f, texture_8bpp_odd_cache); + + fputs("\n#endif /* __P_PSX_GPU_OFFSETS_H__ */\n", f); fclose(f); return 0; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 5badf6b9..de227d5b 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -788,78 +788,111 @@ breakloop: #ifdef PCSX -#define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16)) - -static int is_new_scanout(psx_gpu_struct *psx_gpu, int x) +// this thing has become such a PITA, should just handle the 2048 width really +static void update_enhancement_buf_scanouts(psx_gpu_struct *psx_gpu, + int x, int y, int w, int h) { - int i, scanout_x; - for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanout_x); i++) - { - scanout_x = psx_gpu->enhancement_scanout_x[i]; - if (x <= scanout_x && scanout_x < x + ENH_BUF_TABLE_STEP) - { - if (x != scanout_x) - log_anomaly("unaligned scanout x: %d,%d\n", scanout_x, x); - return 1; - } + int max_bufs = ARRAY_SIZE(psx_gpu->enhancement_scanouts); + struct psx_gpu_scanout *s; + int i, sel, right, bottom; + u32 tol_x = 48, tol_y = 16; + u32 intersection; + + //w = (w + 15) & ~15; + psx_gpu->saved_hres = w; + assert(!(max_bufs & (max_bufs - 1))); + for (i = 0; i < max_bufs; i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (s->x == x && s->y == y && w - s->w <= tol_x && h - s->h <= tol_y) + return; } - return 0; -} - -static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu) -{ - u32 b, x; - b = 0; - psx_gpu->enhancement_buf_by_x16[0] = b; - psx_gpu->enhancement_buf_start[0] = 0; - for (x = 1; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) - { - if (b < 3 && is_new_scanout(psx_gpu, x * ENH_BUF_TABLE_STEP)) { - b++; - psx_gpu->enhancement_buf_start[b] = x * ENH_BUF_TABLE_STEP; + // evict any scanout that intersects + right = x + w; + bottom = y + h; + for (i = 0, sel = -1; i < max_bufs; i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (s->x >= right) continue; + if (s->x + s->w <= x) continue; + if (s->y >= bottom) continue; + if (s->y + s->h <= y) continue; + // ... but allow upto 16 pixels intersection that some games do + if ((intersection = s->x + s->w - x) - 1u <= tol_x) { + s->w -= intersection; + continue; } - - psx_gpu->enhancement_buf_by_x16[x] = b; + if ((intersection = s->y + s->h - y) - 1u <= tol_y) { + s->h -= intersection; + continue; + } + //printf("%4d%4d%4dx%d evicted\n", s->x, s->y, s->w, s->h); + s->w = 0; + sel = i; + break; } -#if 0 - printf("buf_by_x16:\n"); - for (b = 0; b < 3; b++) { - int first = -1, count = 0; - for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { - if (psx_gpu->enhancement_buf_by_x16[x] == b) { - if (first < 0) first = x; - count++; + if (sel >= 0) { + // 2nd intersection check + for (i = 0; i < max_bufs; i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (!s->w) + continue; + if ((intersection = right - s->x) - 1u <= tol_x) { + w -= intersection; + break; + } + if ((intersection = bottom - s->y) - 1u <= tol_y) { + h -= intersection; + break; } - } - if (count) { - assert(first * ENH_BUF_TABLE_STEP == psx_gpu->enhancement_buf_start[b]); - printf("%d: %3zd-%zd\n", b, first * ENH_BUF_TABLE_STEP, - (first + count) * ENH_BUF_TABLE_STEP); } } + else + sel = psx_gpu->enhancement_scanout_eselect++; + psx_gpu->enhancement_scanout_eselect &= max_bufs - 1; + s = &psx_gpu->enhancement_scanouts[sel]; + s->x = x; + s->y = y; + s->w = w; + s->h = h; + + sync_enhancement_buffers(x, y, w, h); +#if 0 + printf("scanouts:\n"); + for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanouts); i++) { + s = &psx_gpu->enhancement_scanouts[i]; + if (s->w) + printf("%4d%4d%4dx%d\n", s->x, s->y, s->w, s->h); + } #endif } -static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, - u32 x0, u32 len) +static int select_enhancement_buf_index(psx_gpu_struct *psx_gpu, s32 x, s32 y) { -#if 0 - u32 x, b; + int i; + for (i = 0; i < ARRAY_SIZE(psx_gpu->enhancement_scanouts); i++) { + const struct psx_gpu_scanout *s = &psx_gpu->enhancement_scanouts[i]; + if (s->x <= x && x < s->x + s->w && + s->y <= y && y < s->y + s->h) + return i; + } + return -1; +} - for (x = x0, b = 0; x >= len; b++) - x -= len; - if (b > 3) - b = 3; +#define select_enhancement_buf_by_index(psx_gpu_, i_) \ + ((psx_gpu_)->enhancement_buf_ptr + ((i_) << 20)) - memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP, - b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP); -#endif +static void *select_enhancement_buf_ptr(psx_gpu_struct *psx_gpu, s32 x, s32 y) +{ + int i = select_enhancement_buf_index(psx_gpu, x, y); + return i >= 0 ? select_enhancement_buf_by_index(psx_gpu, i) : NULL; } -#define select_enhancement_buf(psx_gpu) \ - psx_gpu->enhancement_current_buf_ptr = \ - select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x) +static void select_enhancement_buf(psx_gpu_struct *psx_gpu) +{ + s32 x = psx_gpu->saved_viewport_start_x; + s32 y = psx_gpu->saved_viewport_start_y; + psx_gpu->enhancement_current_buf_ptr = select_enhancement_buf_ptr(psx_gpu, x, y); +} #define enhancement_disable() { \ psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \ @@ -870,13 +903,19 @@ static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, psx_gpu->uvrgb_phase = 0x8000; \ } -#define enhancement_enable() { \ - psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; \ - psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \ - psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ - psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; \ - psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; \ - psx_gpu->uvrgb_phase = 0x7fff; \ +static int enhancement_enable(psx_gpu_struct *psx_gpu) +{ + if (!psx_gpu->enhancement_current_buf_ptr) + return 0; + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; + psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; + psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; + psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; + psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; + if (psx_gpu->viewport_end_x - psx_gpu->viewport_start_x + 1 > 1024) + psx_gpu->viewport_end_x = psx_gpu->viewport_start_x + 1023; + psx_gpu->uvrgb_phase = 0x7fff; + return 1; } #define shift_vertices3(v) { \ @@ -971,17 +1010,10 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h) static int disable_main_render; -static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int x_end) +// simple check for a case where no clipping is used +// - now handled by adjusting the viewport +static int check_enhanced_range(psx_gpu_struct *psx_gpu, int x, int y) { - // reject to avoid oveflowing the 1024 width - // (assume some offscreen render-to-texture thing) - int fb_index; - if (x < 0) - return 1; - fb_index = select_enhancement_buf_index(psx_gpu, x); - if (x >= psx_gpu->enhancement_buf_start[fb_index] + 512) - return 0; - return 1; } @@ -1067,7 +1099,9 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x)) return; - enhancement_enable(); + if (!enhancement_enable(psx_gpu)) + return; + shift_vertices3(vertex_ptrs); shift_triangle_area(); render_triangle_p(psx_gpu, vertex_ptrs, current_command); @@ -1198,7 +1232,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 width = list_s16[4] & 0x3FF; u32 height = list_s16[5] & 0x1FF; u32 color = list[0] & 0xFFFFFF; - u32 i1, i2; + s32 i1, i2; x &= ~0xF; width = ((width + 0xF) & ~0xF); @@ -1207,16 +1241,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, do_fill(psx_gpu, x, y, width, height, color); - i1 = select_enhancement_buf_index(psx_gpu, x); - i2 = select_enhancement_buf_index(psx_gpu, x + width - 1); - if (i1 != i2) { + i1 = select_enhancement_buf_index(psx_gpu, x, y); + i2 = select_enhancement_buf_index(psx_gpu, x + width - 1, y + height - 1); + if (i1 < 0 || i1 != i2) { sync_enhancement_buffers(x, y, width, height); break; } - if (x >= psx_gpu->enhancement_buf_start[i1] + psx_gpu->saved_hres) - break; - psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); + psx_gpu->vram_out_ptr = select_enhancement_buf_by_index(psx_gpu, i1); x *= 2; y *= 2; width *= 2; @@ -1346,8 +1378,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[5] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, list[0], 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, list[0], 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, list[0], 1); break; } @@ -1370,8 +1402,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); render_line(psx_gpu, vertexes, current_command, list[0], 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, list[0], 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, list[0], 1); list_position++; num_vertexes++; @@ -1406,8 +1438,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertexes[1].y = list_s16[7] + psx_gpu->offset_y; render_line(psx_gpu, vertexes, current_command, 0, 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, 0, 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, 0, 1); break; } @@ -1439,8 +1471,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, enhancement_disable(); render_line(psx_gpu, vertexes, current_command, 0, 0); - enhancement_enable(); - render_line(psx_gpu, vertexes, current_command, 0, 1); + if (enhancement_enable(psx_gpu)) + render_line(psx_gpu, vertexes, current_command, 0, 1); list_position += 2; num_vertexes++; @@ -1632,8 +1664,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s16 viewport_start_x = list[0] & 0x3FF; s16 viewport_start_y = (list[0] >> 10) & 0x1FF; - u32 w; - s32 d; if(viewport_start_x == psx_gpu->viewport_start_x && viewport_start_y == psx_gpu->viewport_start_y) @@ -1645,13 +1675,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_start_x = viewport_start_x; psx_gpu->saved_viewport_start_y = viewport_start_y; - w = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1; - d = psx_gpu->saved_hres - w; - if(-16 <= d && d <= 16) - { - update_enhancement_buf_table_from_x(psx_gpu, - viewport_start_x, w); - } select_enhancement_buf(psx_gpu); #ifdef TEXTURE_CACHE_4BPP @@ -1668,8 +1691,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { s16 viewport_end_x = list[0] & 0x3FF; s16 viewport_end_y = (list[0] >> 10) & 0x1FF; - u32 w; - s32 d; if(viewport_end_x == psx_gpu->viewport_end_x && viewport_end_y == psx_gpu->viewport_end_y) @@ -1682,13 +1703,6 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, psx_gpu->saved_viewport_end_x = viewport_end_x; psx_gpu->saved_viewport_end_y = viewport_end_y; - w = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1; - d = psx_gpu->saved_hres - w; - if(-16 <= d && d <= 16) - { - update_enhancement_buf_table_from_x(psx_gpu, - psx_gpu->viewport_start_x, w); - } select_enhancement_buf(psx_gpu); #ifdef TEXTURE_CACHE_4BPP diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 4a8b76fc..04a15eb2 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -13,6 +13,12 @@ #include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths @@ -61,7 +67,9 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) static void *get_enhancement_bufer(int *x, int *y, int *w, int *h, int *vram_h) { - uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x); + uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x, *y); + if (ret == NULL) + return NULL; *x *= 2; *y *= 2; @@ -115,44 +123,28 @@ void renderer_finish(void) static __attribute__((noinline)) void sync_enhancement_buffers(int x, int y, int w, int h) { - const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16); - int hres = egpu.saved_hres; - int x_buf, w1, s, fb_index; - u16 *src, *dst; - - if (egpu.enhancement_buf_ptr == NULL) - return; - - w += x & (step_x - 1); - x &= ~(step_x - 1); - w = (w + step_x - 1) & ~(step_x - 1); - if (y + h > 512) - h = 512 - y; - - // find x_buf which is an offset into this enhancement_buf - fb_index = egpu.enhancement_buf_by_x16[x / step_x]; - x_buf = x - egpu.enhancement_buf_start[fb_index]; - - while (w > 0) { - fb_index = egpu.enhancement_buf_by_x16[x / step_x]; - for (w1 = 0; w > 0 && x_buf < hres; x_buf += step_x, w1++, w -= step_x) - if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1]) - break; - // skip further unneeded data, if any - for (s = 0; w > 0; s++, w -= step_x) - if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1 + s]) - break; - - if (w1 > 0) { - src = gpu.vram + y * 1024 + x; - dst = select_enhancement_buf_ptr(&egpu, x); - dst += (y * 1024 + x) * 2; - scale2x_tiles8(dst, src, w1 * step_x / 8, h); - } - - x += (w1 + s) * step_x; - x &= 0x3ff; - x_buf = 0; + int i, right = x + w, bottom = y + h; + const u16 *src = gpu.vram; + // use these because the scanout struct may hold reduced w, h + // due to intersection stuff, see the update_enhancement_buf_scanouts() mess + int s_w = max(gpu.screen.hres, gpu.screen.w); + int s_h = gpu.screen.vres; + s_w = min(s_w, 512); + for (i = 0; i < ARRAY_SIZE(egpu.enhancement_scanouts); i++) { + const struct psx_gpu_scanout *s = &egpu.enhancement_scanouts[i]; + u16 *dst = select_enhancement_buf_by_index(&egpu, i); + int x1, x2, y1, y2; + if (s->w == 0) continue; + if (s->x >= right) continue; + if (s->x + s_w <= x) continue; + if (s->y >= bottom) continue; + if (s->y + s_h <= y) continue; + x1 = max(x, s->x); + x2 = min(right, s->x + s_w); + y1 = max(y, s->y); + y2 = min(bottom, s->y + s_h); + scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2, + src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1); } } @@ -167,8 +159,8 @@ void renderer_update_caches(int x, int y, int w, int h, int state_changed) if (gpu.state.enhancement_active) { if (state_changed) { - egpu.saved_hres = 0; - renderer_notify_res_change(); + memset(egpu.enhancement_scanouts, 0, sizeof(egpu.enhancement_scanouts)); + egpu.enhancement_scanout_eselect = 0; return; } sync_enhancement_buffers(x, y, w, h); @@ -191,45 +183,15 @@ void renderer_set_interlace(int enable, int is_odd) void renderer_notify_res_change(void) { - renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); + renderer_notify_scanout_change(gpu.screen.src_x, gpu.screen.src_y); } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { - int hres = (w + 15) & ~15; - int max_bufs = ARRAY_SIZE(egpu.enhancement_scanout_x); - int need_update = 0; - int i; - - if (!gpu.state.enhancement_active) + if (!gpu.state.enhancement_active || !egpu.enhancement_buf_ptr) return; - assert(!(max_bufs & (max_bufs - 1))); - if (egpu.saved_hres != hres) { - for (i = 0; i < max_bufs; i++) - egpu.enhancement_scanout_x[i] = x; - need_update = 1; - } - - if (egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] != x) - { - // maybe triple buffering? - for (i = 0; i < max_bufs; i++) - if (egpu.enhancement_scanout_x[i] == x) - break; - if (i == max_bufs) - need_update = 1; - - egpu.enhancement_scanout_x[egpu.enhancement_scanout_select] = x; - } - egpu.enhancement_scanout_select++; - egpu.enhancement_scanout_select &= max_bufs - 1; - if (need_update) - { - egpu.saved_hres = hres; - update_enhancement_buf_table_from_hres(&egpu); - sync_enhancement_buffers(0, 0, 1024, 512); - } + update_enhancement_buf_scanouts(&egpu, x, y, gpu.screen.hres, gpu.screen.vres); } #include "../../frontend/plugin_lib.h" diff --git a/plugins/gpu_senquack/gpulib_if.cpp b/plugins/gpu_senquack/gpulib_if.cpp index 72dcc6d6..e5a51aa5 100644 --- a/plugins/gpu_senquack/gpulib_if.cpp +++ b/plugins/gpu_senquack/gpulib_if.cpp @@ -144,7 +144,7 @@ void renderer_notify_res_change(void) */ } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 1c461421..02f6b922 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -163,7 +163,7 @@ void renderer_notify_res_change(void) { } -void renderer_notify_scanout_x_change(int x, int w) +void renderer_notify_scanout_change(int x, int y) { } diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 15810b82..fdb109ce 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -260,6 +260,7 @@ long GPUshutdown(void) void GPUwriteStatus(uint32_t data) { uint32_t cmd = data >> 24; + int src_x, src_y; if (cmd < ARRAY_SIZE(gpu.regs)) { if (cmd > 1 && cmd != 5 && gpu.regs[cmd] == data) @@ -289,14 +290,17 @@ void GPUwriteStatus(uint32_t data) gpu.status |= PSX_GPU_STATUS_DMA(data & 3); break; case 0x05: - gpu.screen.src_x = data & 0x3ff; - gpu.screen.src_y = (data >> 10) & 0x1ff; - renderer_notify_scanout_x_change(gpu.screen.src_x, gpu.screen.hres); - if (gpu.frameskip.set) { - decide_frameskip_allow(gpu.ex_regs[3]); - if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { - decide_frameskip(); - gpu.frameskip.last_flip_frame = *gpu.state.frame_count; + src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff; + if (src_x != gpu.screen.src_x || src_y != gpu.screen.src_y) { + gpu.screen.src_x = src_x; + gpu.screen.src_y = src_y; + renderer_notify_scanout_change(src_x, src_y); + if (gpu.frameskip.set) { + decide_frameskip_allow(gpu.ex_regs[3]); + if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) { + decide_frameskip(); + gpu.frameskip.last_flip_frame = *gpu.state.frame_count; + } } } break; @@ -825,7 +829,7 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1)); } renderer_sync_ecmds(gpu.ex_regs); - renderer_update_caches(0, 0, 1024, 512, 1); + renderer_update_caches(0, 0, 1024, 512, 0); break; } diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 4abc36b7..bf3d28a9 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -126,7 +126,7 @@ void renderer_flush_queues(void); void renderer_set_interlace(int enable, int is_odd); void renderer_set_config(const struct rearmed_cbs *config); void renderer_notify_res_change(void); -void renderer_notify_scanout_x_change(int x, int w); +void renderer_notify_scanout_change(int x, int y); int vout_init(void); int vout_finish(void); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index f9ac0f30..958468c9 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -84,6 +84,8 @@ void vout_update(void) if (!gpu.state.enhancement_was_active) return; // buffer not ready yet vram = gpu.get_enhancement_bufer(&src_x, &src_y, &w, &h, &vram_h); + if (vram == NULL) + return; x *= 2; y *= 2; src_x2 *= 2; } -- 2.39.5