From: notaz Date: Sat, 27 Oct 2012 19:14:16 +0000 (+0300) Subject: psx_gpu: implement setup_sprite_untextured in asm X-Git-Tag: r16~15^2~5 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f0931e56b2428fe5e0f6b4d7d6d0f41462cfc551;p=pcsx_rearmed.git psx_gpu: implement setup_sprite_untextured in asm --- diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index ce72af55..485ef27a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -3885,6 +3885,11 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color); + #ifndef NEON_BUILD setup_sprite_tiled_builder(4bpp,); setup_sprite_tiled_builder(8bpp,); @@ -4013,11 +4018,16 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } -#endif - void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; @@ -4083,6 +4093,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } +#endif + +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color) +{ + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024); + u32 *vram_ptr; + + u32 num_width; + + if(psx_gpu->num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + } + + while(height) + { + num_width = width; + + vram_ptr = (void *)vram_ptr16; + if((long)vram_ptr16 & 2) + { + *vram_ptr16 = color_32bpp; + vram_ptr = (void *)(vram_ptr16 + 1); + num_width--; + } + + while(num_width >= 4 * 2) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + + vram_ptr += 4; + num_width -= 4 * 2; + } + + while(num_width >= 2) + { + *vram_ptr++ = color_32bpp; + num_width -= 2; + } + + if(num_width > 0) + { + *(u16 *)vram_ptr = color_32bpp; + } + + vram_ptr16 += 1024; + height--; + } +} #define setup_sprite_blocks_switch_textured(texture_mode) \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 103483a8..085e11b0 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -17,6 +17,10 @@ #define MAX_BLOCKS 64 #define MAX_BLOCKS_PER_ROW 128 +#define RENDER_STATE_MASK_EVALUATE 0x20 +#define RENDER_FLAGS_MODULATE_TEXELS 0x1 +#define RENDER_FLAGS_BLEND 0x2 + #include "psx_gpu_offsets.h" #define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) @@ -5687,6 +5691,145 @@ function(setup_sprite_16bpp_4x) ldmia sp!, { r4 - r11, pc } +#undef width +#undef right_width +#undef right_mask_bits +#undef color +#undef height +#undef blocks_remaining +#undef colors +#undef right_mask +#undef test_mask +#undef draw_mask + +#define psx_gpu r0 +#define x r1 +#define y r2 +#define width r3 +#define right_width r5 +#define right_mask_bits r6 +#define fb_ptr r7 +#define color r8 +#define height r9 +#define fb_ptr_pitch r12 + +// referenced by setup_sprites_16bpp_flush +#define num_blocks r4 +#define block r5 +#define block_width r11 + +#define color_r r1 +#define color_g r2 +#define color_b r8 +#define blocks_remaining r6 + +#define colors q0 +#define right_mask q1 +#define test_mask q2 +#define draw_mask q2 +#define draw_mask_bits_fb_ptr d6 + + +.align 3 + +function(setup_sprite_untextured) + ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ] + tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \ + | RENDER_FLAGS_BLEND) + beq setup_sprite_untextured_simple + + stmdb sp!, { r4 - r11, r14 } + + ldr width, [ sp, #40 ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] + + ldr height, [ sp, #44 ] + add fb_ptr, fb_ptr, y, lsl #11 + + add fb_ptr, fb_ptr, x, lsl #1 + sub right_width, width, #1 + + ldr color, [ sp, #48 ] + and right_width, #7 + + add block_width, width, #7 + add right_width, #1 + + lsr block_width, #3 + mov right_mask_bits, #0xff + + sub fb_ptr_pitch, block_width, #1 + lsl right_mask_bits, right_width + + lsl fb_ptr_pitch, #3+1 + ubfx color_r, color, #3, #5 + + rsb fb_ptr_pitch, #1024*2 + ubfx color_g, color, #11, #5 + + vld1.u32 { test_mask }, [ psx_gpu, :128 ] + ubfx color_b, color, #19, #5 + + vdup.u16 right_mask, right_mask_bits + orr color, color_r, color_b, lsl #10 + + ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + orr color, color, color_g, lsl #5 + + vtst.u16 right_mask, right_mask, test_mask + add block, psx_gpu, #psx_gpu_blocks_offset + + vdup.u16 colors, color + add block, block, num_blocks, lsl #6 + + +setup_sprite_untextured_height_loop: + add num_blocks, block_width + sub blocks_remaining, block_width, #1 + + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + cmp blocks_remaining, #0 + ble 1f + + vmov.u8 draw_mask, #0 /* zero_mask */ + vmov.u8 draw_mask_bits_fb_ptr, #0 + + 0: + vst1.u32 { draw_mask }, [ block, :128 ]! + subs blocks_remaining, #1 + + vst1.u32 { colors }, [ block, :128 ] + add block, block, #24 + + vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr + vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ] + + add block, block, #24 + add fb_ptr, #8*2 + bgt 0b + + 1: + vst1.u32 { right_mask }, [ block, :128 ]! + subs height, #1 + + vst1.u32 { colors }, [ block, :128 ] + add block, block, #24 + + vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr + vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ] + + add block, block, #24 + add fb_ptr, fb_ptr_pitch + + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + bgt setup_sprite_untextured_height_loop + + ldmia sp!, { r4 - r11, pc } + + + #undef texture_page_ptr #undef vram_ptr #undef dirty_textures_mask