From 6c4a10c497c1262acc05b15ffacb57acfffa409c Mon Sep 17 00:00:00 2001 From: Exophase Date: Wed, 4 Jan 2012 03:01:19 +0200 Subject: [PATCH] psx_gpu: change fill handling again --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 46 ++++++++++----- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 63 ++++++--------------- 2 files changed, 50 insertions(+), 59 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 76bfb157..7c1503bc 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4322,30 +4322,48 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, flush_render_block_buffer(psx_gpu); invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); -#ifndef NEON_BUILD u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; u32 b = (color >> 16) & 0xFF; - u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10); + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); - u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024); - u32 draw_x, draw_y; + u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024)); - for(draw_y = 0; draw_y < height; draw_y++) + u32 pitch = 512 - (width / 2); + u32 num_width; + + if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) { - for(draw_x = 0; draw_x < width; draw_x++) + pitch += 512; + height /= 2; + + if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + vram_ptr += 512; + } + + while(height) + { + num_width = width; + while(num_width) { - vram_ptr[draw_x] = color_16bpp; + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + vram_ptr[4] = color_32bpp; + vram_ptr[5] = color_32bpp; + vram_ptr[6] = color_32bpp; + vram_ptr[7] = color_32bpp; + + vram_ptr += 8; + num_width -= 16; } - vram_ptr += 1024; + vram_ptr += pitch; + height--; } -#else - void render_block_fill_body(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, - u32 width, u32 height); - - render_block_fill_body(psx_gpu, color, x, y, width, height); -#endif } void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 973a8b36..11a11b1e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -4337,31 +4337,20 @@ function(warmup) bx lr +#undef vram_ptr #undef color -#undef y +#undef width #undef height - -#define psx_gpu r0 -#define color r1 -#define x r2 -#define y r3 +#undef pitch #define vram_ptr r0 -#define width r3 -#define height r12 - -#define parameter_width_offset 0 -#define parameter_height_offset 4 +#define color r1 +#define width r2 +#define height r3 -#define color_r r14 -#define color_g r4 -#define color_b r5 +#define pitch r1 -#define left_unaligned r14 -#define right_unaligned r4 -#define pitch r5 -#define num_unaligned r2 -#define num_width r6 +#define num_width r12 #undef colors_a #undef colors_b @@ -4372,44 +4361,28 @@ function(warmup) .align 3 function(render_block_fill_body) - ldr vram_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] - ldr height, [ sp, #parameter_height_offset ] - - add vram_ptr, vram_ptr, y, lsl #11 - ldr width, [ sp, #parameter_width_offset ] - - add vram_ptr, vram_ptr, x, lsl #1 - stmdb sp!, { r4 - r6, r14 } - - ubfx color_r, color, #3, #5 - ubfx color_g, color, #11, #5 - - ubfx color_b, color, #19, #5 - orr color, color_r, color_g, lsl #5 - - orr color, color, color_b, lsl #10 vdup.u16 colors_a, color + mov pitch, #2048 vmov colors_b, colors_a - mov pitch, #2048 sub pitch, pitch, width, lsl #1 - 0: - mov num_width, width, lsr #4 + mov num_width, width - 1: - vst1.u32 { colors_a, colors_b }, [ vram_ptr, :128 ]! + 0: + vst1.u32 { colors_a, colors_b }, [ vram_ptr, :256 ]! - subs num_width, num_width, #1 - bne 1b + subs num_width, num_width, #2 + bne 0b add vram_ptr, vram_ptr, pitch + mov num_width, width + subs height, height, #1 bne 0b - - 1: - ldmia sp!, { r4 - r6, pc } + bx lr + #undef x #undef y -- 2.39.2