From 87c45ad1e2a265cedb7970cc1b7777591d0050b7 Mon Sep 17 00:00:00 2001 From: Exophase Date: Sun, 1 Jan 2012 23:07:31 +0200 Subject: [PATCH] psx_gpu: improve fills --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 45 +++++------------- plugins/gpu_neon/psx_gpu/psx_gpu_main.c | 11 +++-- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 52 +++++++++++++++++++-- 4 files changed, 67 insertions(+), 43 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 75deb301..76bfb157 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4352,8 +4352,8 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, u32 width, u32 height, u32 pitch) { u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024); - u32 mask_msb = psx_gpu->mask_msb; u32 draw_x, draw_y; + u32 mask_msb = psx_gpu->mask_msb; if((width == 0) || (height == 0)) return; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 0dc7ecee..973a8b36 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -4363,9 +4363,11 @@ function(warmup) #define num_unaligned r2 #define num_width r6 -#undef colors +#undef colors_a +#undef colors_b -#define colors q0 +#define colors_a q0 +#define colors_b q1 .align 3 @@ -4386,51 +4388,26 @@ function(render_block_fill_body) orr color, color_r, color_g, lsl #5 orr color, color, color_b, lsl #10 - add left_unaligned, x, #0x7 + vdup.u16 colors_a, color - bic left_unaligned, left_unaligned, #0x7 - vdup.u16 colors, color - - sub left_unaligned, left_unaligned, x + vmov colors_b, colors_a mov pitch, #2048 - sub pitch, pitch, width, lsl #1 - sub width, width, left_unaligned - - and right_unaligned, width, #0x7 - bic width, width, #0x7 0: - mov num_width, width, lsr #3 - - movs num_unaligned, left_unaligned - beq 2f + mov num_width, width, lsr #4 1: - strh color, [ vram_ptr ], #2 - - subs num_unaligned, num_unaligned, #1 - bne 1b + vst1.u32 { colors_a, colors_b }, [ vram_ptr, :128 ]! - 2: - vst1.u32 { colors }, [ vram_ptr, :128 ]! subs num_width, num_width, #1 - bne 2b - - movs num_unaligned, right_unaligned - beq 4f - - 3: - strh color, [ vram_ptr ], #2 - - subs num_unaligned, num_unaligned, #1 - bne 3b + bne 1b - 4: add vram_ptr, vram_ptr, pitch subs height, height, #1 bne 0b - + + 1: ldmia sp!, { r4 - r6, pc } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index f1f79448..6c17b0ab 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -174,7 +174,7 @@ int main(int argc, char *argv[]) MAP_SHARED | 0xA0000000, fbdev_handle, 0)); vram_ptr += 64; - initialize_psx_gpu(psx_gpu, vram_ptr + 64); + initialize_psx_gpu(psx_gpu, vram_ptr); #else initialize_psx_gpu(psx_gpu, _vram + 64); #endif @@ -204,14 +204,15 @@ int main(int argc, char *argv[]) gpu_parse(psx_gpu, list, size); flush_render_block_buffer(psx_gpu); - printf("%-64s: ", argv[1]); #ifdef NEON_BUILD u32 cycles_elapsed = get_counter() - cycles; - printf("%d\n", cycles_elapsed); + printf("%-64s: %d\n", argv[1], cycles_elapsed); +#else + printf("%-64s: ", argv[1]); #endif -#if 0 +#if 1 u32 i; for(i = 0; i < 1024 * 512; i++) @@ -238,7 +239,7 @@ int main(int argc, char *argv[]) } #endif -#if 1 +#if 0 printf("\n"); printf(" %d pixels, %d pixel blocks, %d spans\n" " (%lf pixels per block, %lf pixels per span),\n" diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 7fee2ebe..4f3dd124 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -210,16 +210,61 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) s16 *list_s16 = (void *)list; current_command = *list >> 24; command_length = command_lengths[current_command]; - + switch(current_command) { case 0x00: break; case 0x02: - render_block_fill(psx_gpu, list[0] & 0xFFFFFF, list_s16[2] & 0x3FF, - list_s16[3] & 0x1FF, list_s16[4] & 0x3FF, list_s16[5] & 0x1FF); + { + u32 x = list_s16[2] & 0x3FF; + u32 y = list_s16[3] & 0x1FF; + u32 width = list_s16[4] & 0x3FF; + u32 height = list_s16[5] & 0x1FF; + u32 color = list[0] & 0xFFFFFF; + + x &= ~0xF; + width = ((width + 0xF) & ~0xF); + + if((x + width) > 1024) + { + u32 width_a = 1024 - x; + u32 width_b = width - width_a; + + if((y + height) > 512) + { + u32 height_a = 512 - y; + u32 height_b = height - height_a; + + render_block_fill(psx_gpu, color, x, y, width_a, height_a); + render_block_fill(psx_gpu, color, 0, y, width_b, height_a); + render_block_fill(psx_gpu, color, x, 0, width_a, height_b); + render_block_fill(psx_gpu, color, 0, 0, width_b, height_b); + } + else + { + render_block_fill(psx_gpu, color, x, y, width_a, height); + render_block_fill(psx_gpu, color, 0, y, width_b, height); + } + } + else + { + if((y + height) > 512) + { + u32 height_a = 512 - y; + u32 height_b = height - height_a; + + render_block_fill(psx_gpu, color, x, y, width, height_a); + render_block_fill(psx_gpu, color, x, 0, width, height_b); + } + else + { + render_block_fill(psx_gpu, color, x, y, width, height); + } + } break; + } case 0x20 ... 0x23: { @@ -567,6 +612,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) case 0xE1: set_texture(psx_gpu, list[0] & 0x1FF); + if(list[0] & (1 << 9)) psx_gpu->render_state_base |= RENDER_STATE_DITHER; else -- 2.39.5