From: notaz Date: Sat, 18 Aug 2012 21:37:50 +0000 (+0300) Subject: psx_gpu: start handling vram loads/moves for enhancement X-Git-Tag: r16~15^2~16 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=commitdiff_plain;h=50f9355a2338111d940ed408f52fe1defe4df23e psx_gpu: start handling vram loads/moves for enhancement --- diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 3239412b..87a14f64 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -5410,3 +5410,38 @@ function(update_texture_8bpp_cache_slice) vpop { q0 - q3 } ldmia sp!, { r4 - r11, pc } + +/* void scale2x_tiles8(void *dst, const void *src, int w8, int h) */ +function(scale2x_tiles8) + push { r4, r14 } + + mov r4, r1 + add r12, r0, #1024*2 + mov r14, r2 + +0: + vld1.u16 { q0 }, [ r1, :128 ]! + vld1.u16 { q2 }, [ r1, :128 ]! + vmov q1, q0 + vmov q3, q2 + vzip.16 q0, q1 + vzip.16 q2, q3 + subs r14, #2 + vst1.u16 { q0, q1 }, [ r0, :128 ]! + vst1.u16 { q0, q1 }, [ r12, :128 ]! + blt 1f + vst1.u16 { q2, q3 }, [ r0, :128 ]! + vst1.u16 { q2, q3 }, [ r12, :128 ]! + bgt 0b +1: + subs r3, #1 + mov r14, r2 + add r0, #1024*2*2 + add r4, #1024*2 + sub r0, r2, lsl #4+1 + mov r1, r4 + add r12, r0, #1024*2 + bgt 0b + nop + + pop { r4, pc } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 86a816ec..af82d7e5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -798,6 +798,8 @@ breakloop: #define shift_triangle_area() \ psx_gpu->triangle_area *= 4 +extern void scale2x_tiles8(void *dst, const void *src, int w8, int h); + static int disable_main_render; static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, @@ -1292,37 +1294,28 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c } case 0x80: // vid -> vid - render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF, - list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, - ((list_s16[6] - 1) & 0x3FF) + 1, ((list_s16[7] - 1) & 0x1FF) + 1); + { + u32 sx = list_s16[2] & 0x3FF; + u32 sy = list_s16[3] & 0x1FF; + u32 dx = list_s16[4] & 0x3FF; + u32 dy = list_s16[5] & 0x1FF; + u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; + u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; + + render_block_move(psx_gpu, sx, sy, dx, dy, w, h); + if (dy + h > 512) + h = 512 - dy; + sx = sx & ~7; // FIXME? + dx = dx * 2 & ~7; + dy *= 2; + scale2x_tiles8(psx_gpu->enhancement_buf_ptr + dy * 1024 + dx, + psx_gpu->vram_ptr + sy * 1024 + sx, w / 8, h); break; + } -#ifdef PCSX case 0xA0: // sys -> vid case 0xC0: // vid -> sys goto breakloop; -#else - case 0xA0: // sys -> vid - { - u32 load_x = list_s16[2] & 0x3FF; - u32 load_y = list_s16[3] & 0x1FF; - u32 load_width = list_s16[4] & 0x3FF; - u32 load_height = list_s16[5] & 0x1FF; - u32 load_size = load_width * load_height; - - command_length += load_size / 2; - - if(load_size & 1) - command_length++; - - render_block_copy(psx_gpu, (u16 *)&(list_s16[6]), load_x, load_y, - load_width, load_height, load_width); - break; - } - - case 0xC0: // vid -> sys - break; -#endif case 0xE1: set_texture(psx_gpu, list[0]); diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index 250aa8c7..ca76fe24 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -9,6 +9,7 @@ */ #include +#include extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths @@ -39,6 +40,8 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd) return ret; } +#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096) + int renderer_init(void) { initialize_psx_gpu(&egpu, gpu.vram); @@ -48,9 +51,12 @@ int renderer_init(void) // currently we use 4x 1024*1024 buffers instead of single 2048*1024 // to be able to reuse 1024-width code better (triangle setup, // dithering phase, lines). - gpu.enhancement_bufer = malloc(1024 * 1024 * 2 * 4); - if (gpu.enhancement_bufer == NULL) + gpu.enhancement_bufer = mmap(NULL, ENHANCEMENT_BUF_SIZE, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (gpu.enhancement_bufer == MAP_FAILED) { printf("OOM for enhancement buffer\n"); + gpu.enhancement_bufer = NULL; + } } egpu.enhancement_buf_ptr = gpu.enhancement_bufer; @@ -59,11 +65,44 @@ int renderer_init(void) void renderer_finish(void) { - free(gpu.enhancement_bufer); + if (gpu.enhancement_bufer != NULL) + munmap(gpu.enhancement_bufer, ENHANCEMENT_BUF_SIZE); gpu.enhancement_bufer = NULL; egpu.enhancement_buf_ptr = NULL; } +static __attribute__((noinline)) void +sync_enhancement_buffers(int x, int y, int w, int h) +{ + int xt = egpu.enhancement_x_threshold; + u16 *src, *dst; + int wb, i; + + w += x & 7; + x &= ~7; + w = (w + 7) & ~7; + if (y + h > 512) + h = 512 - y; + + for (i = 0; i < 4 && w > 0; i++) { + if (x < 512) { + wb = w; + if (x + w > 512) + wb = 512 - x; + src = gpu.vram + xt * i + y * 1024 + x; + dst = egpu.enhancement_buf_ptr + + (1024*1024 + xt * 2) * i + (y * 1024 + x) * 2; + scale2x_tiles8(dst, src, wb / 8, h); + } + + x -= xt; + if (x < 0) { + w += x; + x = 0; + } + } +} + void renderer_sync_ecmds(uint32_t *ecmds) { gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL); @@ -72,6 +111,8 @@ void renderer_sync_ecmds(uint32_t *ecmds) void renderer_update_caches(int x, int y, int w, int h) { update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1); + if (gpu.state.enhancement_active && !gpu.status.rgb24) + sync_enhancement_buffers(x, y, w, h); } void renderer_flush_queues(void) @@ -90,6 +131,7 @@ void renderer_set_interlace(int enable, int is_odd) void renderer_notify_res_change(void) { + // note: must keep it multiple of 8 egpu.enhancement_x_threshold = gpu.screen.hres; } @@ -97,5 +139,13 @@ void renderer_notify_res_change(void) void renderer_set_config(const struct rearmed_cbs *cbs) { + static int enhancement_was_on; + disable_main_render = cbs->gpu_neon.enhancement_no_main; + if (egpu.enhancement_buf_ptr != NULL && cbs->gpu_neon.enhancement_enable + && !enhancement_was_on) + { + sync_enhancement_buffers(0, 0, 1024, 512); + } + enhancement_was_on = cbs->gpu_neon.enhancement_enable; } diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index cbd80349..5c74914a 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -77,8 +77,8 @@ static void blit(void) (x + 8) / stride * 1024 * 1024; x *= 2; y *= 2; - w *= 2; - h *= 2; + w = (w - 2) * 2; + h = (h * 2) - 1; stride *= 2; vram_mask = 1024 * 1024 - 1; }