From 8438c3c78159bd3986560e30bfe97b7bb91f8cc4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Oct 2012 22:18:36 +0300 Subject: [PATCH] psx_gpu: add some preloads seem to be helpful for enhancement --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 085e11b0..a2bfa5b5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -3392,10 +3392,12 @@ function(shade_blocks_textured_unmodulated_direct) [ draw_mask_bits_ptr, :16 ], c_64 vbif.u16 fb_pixels, pixels, draw_mask_combined - vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 - sub fb_ptr_cmp, fb_ptr_next, fb_ptr + pld [ fb_ptr_next, #64 ] + add fb_ptr_cmp, fb_ptr_cmp, #14 + vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 + cmp fb_ptr_cmp, #28 bls 4f @@ -3754,11 +3756,15 @@ function(blend_blocks_textured_add_##mask_evaluate) \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ vand.u16 pixels_mg, pixels, d128_0x83E0; \ \ - vbit.u16 blend_pixels, fb_pixels, draw_mask; \ - vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + pld [ fb_ptr_next, #64 ]; \ \ sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + vbit.u16 blend_pixels, fb_pixels, draw_mask; \ + \ add fb_ptr_cmp, fb_ptr_cmp, #14; \ + vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + \ cmp fb_ptr_cmp, #28; \ bls 2f; \ \ @@ -4917,12 +4923,12 @@ setup_sprite_update_texture_8bpp_cache: draw_mask_fb_ptr_left_b); \ \ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ - add fb_ptr, fb_ptr, #16*2; \ + pld [ fb_ptr, #2048 ]; \ \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ - vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + add fb_ptr, fb_ptr, #16*2; \ \ - pld [ fb_ptr ]; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ \ vzip.8 texels_low, texels_high; \ @@ -4961,9 +4967,10 @@ setup_sprite_update_texture_8bpp_cache: do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ draw_mask_fb_ptr_##edge##_b); \ \ + pld [ fb_ptr, #2048 ]; \ add fb_ptr, fb_ptr, #2048 * 2; \ - subs sub_tile_height, sub_tile_height, #1; \ \ + subs sub_tile_height, sub_tile_height, #1; \ bne 4b; \ \ ldr column_data, [sp], #8; /* fb_ptr2 */ \ @@ -4987,13 +4994,13 @@ setup_sprite_update_texture_8bpp_cache: do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ draw_mask_fb_ptr_left_b); \ \ + pld [ fb_ptr, #2048 ]; \ and texture_block_ptr, texture_block_ptr, texture_mask; \ \ add fb_ptr, fb_ptr, #16*2; \ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ - pld [ fb_ptr ]; \ \ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ draw_mask_fb_ptr_right_b); \ @@ -5022,6 +5029,7 @@ setup_sprite_update_texture_8bpp_cache: add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ \ + pld [ fb_ptr, #2048 ]; \ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ draw_mask_fb_ptr_##edge##_b); \ \ -- 2.39.5