X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=4e1e4032dcf3fbb759f4b0f3cc54fd59325de42c;hp=085e11b07f258a4b5999bc0481794d045fb0f9b1;hb=b7569147823a8fc5a9de98e5d491da906e119296;hpb=f0931e56b2428fe5e0f6b4d7d6d0f41462cfc551 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 085e11b0..4e1e4032 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -1175,6 +1175,10 @@ function(setup_spans_up_down) ldrh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] add temp, temp, height_minor_b + + cmp temp, #MAX_SPANS + beq 5f + strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] 2: @@ -1190,6 +1194,15 @@ function(setup_spans_up_down) setup_spans_prologue_b() bal 4b + 5: + // FIXME: overflow corner case + sub temp, temp, height_minor_b + bics height_minor_b, #3 + add temp, temp, height_minor_b + strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] + bne 2b + bal 1b + .pool #undef span_uvrg_offset @@ -3392,10 +3405,12 @@ function(shade_blocks_textured_unmodulated_direct) [ draw_mask_bits_ptr, :16 ], c_64 vbif.u16 fb_pixels, pixels, draw_mask_combined - vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 - sub fb_ptr_cmp, fb_ptr_next, fb_ptr + pld [ fb_ptr_next, #64 ] + add fb_ptr_cmp, fb_ptr_cmp, #14 + vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 + cmp fb_ptr_cmp, #28 bls 4f @@ -3754,11 +3769,15 @@ function(blend_blocks_textured_add_##mask_evaluate) \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ vand.u16 pixels_mg, pixels, d128_0x83E0; \ \ - vbit.u16 blend_pixels, fb_pixels, draw_mask; \ - vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + pld [ fb_ptr_next, #64 ]; \ \ sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + vbit.u16 blend_pixels, fb_pixels, draw_mask; \ + \ add fb_ptr_cmp, fb_ptr_cmp, #14; \ + vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + \ cmp fb_ptr_cmp, #28; \ bls 2f; \ \ @@ -4917,12 +4936,12 @@ setup_sprite_update_texture_8bpp_cache: draw_mask_fb_ptr_left_b); \ \ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ - add fb_ptr, fb_ptr, #16*2; \ + pld [ fb_ptr, #2048 ]; \ \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ - vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + add fb_ptr, fb_ptr, #16*2; \ \ - pld [ fb_ptr ]; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ \ vzip.8 texels_low, texels_high; \ @@ -4961,9 +4980,10 @@ setup_sprite_update_texture_8bpp_cache: do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ draw_mask_fb_ptr_##edge##_b); \ \ + pld [ fb_ptr, #2048 ]; \ add fb_ptr, fb_ptr, #2048 * 2; \ - subs sub_tile_height, sub_tile_height, #1; \ \ + subs sub_tile_height, sub_tile_height, #1; \ bne 4b; \ \ ldr column_data, [sp], #8; /* fb_ptr2 */ \ @@ -4987,13 +5007,13 @@ setup_sprite_update_texture_8bpp_cache: do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ draw_mask_fb_ptr_left_b); \ \ + pld [ fb_ptr, #2048 ]; \ and texture_block_ptr, texture_block_ptr, texture_mask; \ \ add fb_ptr, fb_ptr, #16*2; \ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ - pld [ fb_ptr ]; \ \ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ draw_mask_fb_ptr_right_b); \ @@ -5022,6 +5042,7 @@ setup_sprite_update_texture_8bpp_cache: add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ \ + pld [ fb_ptr, #2048 ]; \ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ draw_mask_fb_ptr_##edge##_b); \ \