X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=c62c1baa6088af7ab4588f9250202380086addd3;hb=b0d96051c9f087c22922966c651384c3ee84eee0;hp=7c820d273cecd041e709df8b0e0ad18a0391774b;hpb=0e4ad31902f206e2c6945632bb1f558eae941ff1;p=pcsx_rearmed.git diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 7c820d27..c62c1baa 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -34,6 +34,16 @@ .syntax unified .text +#if 0 +#define save_abi_regs() \ + vpush {q4-q7} +#define restore_abi_regs() \ + vpop {q4-q7} +#else +#define save_abi_regs() +#define restore_abi_regs() +#endif + #define psx_gpu r0 #define v_a r1 #define v_b r2 @@ -233,6 +243,7 @@ function(compute_all_gradients) @ r12 = psx_gpu->triangle_area ldr r12, [psx_gpu, #psx_gpu_triangle_area_offset] stmdb sp!, { r4 - r11, lr } + save_abi_regs() @ load exponent of 62 into upper half of double movw r4, #0 @@ -358,8 +369,8 @@ function(compute_all_gradients) sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS) vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16 - vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift } - + vdup.u32 r_shift, r14 @ r_shift = { shift, shift*, shift, shift* } + @ * - vshl.u64: ignored by hw vadd.u32 uvrg_base, uvrgb_phase vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x) @@ -448,6 +459,7 @@ function(compute_all_gradients) stmia store_b, { g_bx0, g_bx, g_bx2, g_bx3, b_base, g_by } + restore_abi_regs() ldmia sp!, { r4 - r11, pc } @@ -578,6 +590,7 @@ function(compute_all_gradients) #define setup_spans_prologue() \ stmdb sp!, { r4 - r11, lr }; \ + save_abi_regs(); \ \ ldrsh x_a, [v_a, #8]; \ ldrsh x_b, [v_b, #8]; \ @@ -974,6 +987,7 @@ function(compute_all_gradients) #define setup_spans_epilogue() \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ @@ -1348,6 +1362,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 uvrg_dx4, uvrg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -1577,6 +1592,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -1584,9 +1600,9 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1617,6 +1633,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 uvrg_dx4, uvrg_dx, #2; \ \ vshl.u32 uvrg_dx8, uvrg_dx, #3; \ @@ -1774,6 +1791,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -1781,9 +1799,9 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1810,6 +1828,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) bxeq lr stmdb sp!, { r4 - r11, r14 } + save_abi_regs() vld1.u32 { test_mask }, [psx_gpu, :128] ldr color, [psx_gpu, #psx_gpu_triangle_color_offset] @@ -1892,6 +1911,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] bne 0b + restore_abi_regs() ldmia sp!, { r4 - r11, pc } 2: @@ -2114,6 +2134,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 rg_dx4, rg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -2306,6 +2327,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -2357,6 +2379,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 rg_dx4, rg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -2577,6 +2600,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ setup_blocks_shaded_untextured_direct_builder(undithered) @@ -3150,6 +3174,7 @@ function(texture_blocks_16bpp) .align 3; \ \ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ + save_abi_regs(); \ shade_blocks_textured_modulated_prologue_##shading(dithering, target); \ stmdb sp!, { r4 - r5, lr }; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ @@ -3267,7 +3292,9 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_store_draw_mask_##target(28); \ shade_blocks_textured_modulated_store_pixels_##target(); \ \ - ldmia sp!, { r4 - r5, pc } \ + ldmia sp!, { r4 - r5, lr }; \ + restore_abi_regs(); \ + bx lr \ shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -3332,7 +3359,8 @@ shade_blocks_textured_modulated_builder(unshaded, undithered, indirect); .align 3 function(shade_blocks_textured_unmodulated_indirect) - str r14, [sp, #-4] + stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40) ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3375,13 +3403,15 @@ function(shade_blocks_textured_unmodulated_indirect) vorr.u16 draw_mask_combined, draw_mask, zero_mask vst1.u32 { draw_mask_combined }, [draw_mask_store_ptr, :128], c_64 - ldr pc, [sp, #-4] + restore_abi_regs() + ldmia sp!, { r4, pc } .align 3 function(shade_blocks_textured_unmodulated_direct) stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40) ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3443,6 +3473,7 @@ function(shade_blocks_textured_unmodulated_direct) vst1.u16 { fb_pixels_next }, [fb_ptr_next] + restore_abi_regs() ldmia sp!, { r4, pc } 4: @@ -3462,6 +3493,7 @@ function(shade_blocks_unshaded_untextured_indirect) function(shade_blocks_unshaded_untextured_direct) stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_ptr, psx_gpu, #psx_gpu_blocks_offset ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3508,6 +3540,7 @@ function(shade_blocks_unshaded_untextured_direct) vbif.u16 fb_pixels_next, pixels, draw_mask vst1.u16 { fb_pixels_next }, [fb_ptr_next] + restore_abi_regs() ldmia sp!, { r4, pc } 4: @@ -3613,6 +3646,7 @@ function(shade_blocks_unshaded_untextured_direct) \ function(blend_blocks_##texturing##_average_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3694,6 +3728,7 @@ function(blend_blocks_##texturing##_average_##mask_evaluate) \ vbif.u16 fb_pixels_next, blend_pixels, draw_mask_next; \ vst1.u16 { fb_pixels_next }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3732,6 +3767,7 @@ blend_blocks_average_builder(untextured, on) \ function(blend_blocks_textured_add_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3817,6 +3853,7 @@ function(blend_blocks_textured_add_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3836,6 +3873,7 @@ function(blend_blocks_textured_add_##mask_evaluate) \ \ function(blend_blocks_untextured_add_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3911,6 +3949,7 @@ function(blend_blocks_untextured_add_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3935,7 +3974,7 @@ blend_blocks_add_untextured_builder(on) #define blend_blocks_subtract_combine_textured() \ vbif.u16 blend_pixels, pixels, blend_mask \ -#define blend_blocks_subtract_set_stb_textured() \ +#define blend_blocks_subtract_set_stp_textured() \ vorr.u16 blend_pixels, #0x8000 \ #define blend_blocks_subtract_msb_mask_textured() \ @@ -3945,7 +3984,7 @@ blend_blocks_add_untextured_builder(on) #define blend_blocks_subtract_combine_untextured() \ -#define blend_blocks_subtract_set_stb_untextured() \ +#define blend_blocks_subtract_set_stp_untextured() \ vorr.u16 blend_pixels, blend_pixels, msb_mask \ #define blend_blocks_subtract_msb_mask_untextured() \ @@ -3968,6 +4007,7 @@ blend_blocks_add_untextured_builder(on) \ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4010,7 +4050,7 @@ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ vld1.u32 { pixels_next }, [pixel_ptr, :128], c_64; \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ vand.u16 pixels_rb, pixels_next, d128_0x7C1F; \ - blend_blocks_subtract_set_stb_##texturing(); \ + blend_blocks_subtract_set_stp_##texturing(); \ vand.u16 pixels_g, pixels_next, d128_0x03E0; \ blend_blocks_subtract_combine_##texturing(); \ blend_blocks_subtract_set_blend_mask_##texturing(); \ @@ -4038,11 +4078,12 @@ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ \ blend_blocks_subtract_msb_mask_##texturing(); \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ - blend_blocks_subtract_set_stb_##texturing(); \ + blend_blocks_subtract_set_stp_##texturing(); \ blend_blocks_subtract_combine_##texturing(); \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4067,6 +4108,7 @@ blend_blocks_subtract_builder(untextured, on) \ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4110,6 +4152,7 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ ldr fb_ptr_next, [pixel_ptr, #28]; \ \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ + vorr.u16 blend_pixels, #0x8000; /* stp */ \ vbif.u16 blend_pixels, pixels, blend_mask; \ \ vld1.u32 { pixels }, [pixel_ptr, :128], c_64; \ @@ -4145,11 +4188,13 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ \ 1: \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ - vorr.u16 blend_pixels, blend_pixels, msb_mask; \ + vorr.u16 blend_pixels, #0x8000; /* stp */ \ vbif.u16 blend_pixels, pixels, blend_mask; \ + vorr.u16 blend_pixels, blend_pixels, msb_mask; \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4169,6 +4214,7 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ \ function(blend_blocks_untextured_add_fourth_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4248,6 +4294,7 @@ function(blend_blocks_untextured_add_fourth_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4273,6 +4320,7 @@ blend_blocks_add_fourth_untextured_builder(on) function(blend_blocks_textured_unblended_on) stmdb sp!, { r4, r14 } + save_abi_regs() add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -4312,6 +4360,7 @@ function(blend_blocks_textured_unblended_on) vbif.u16 fb_pixels, pixels, draw_mask vst1.u16 { fb_pixels }, [fb_ptr] + restore_abi_regs() ldmia sp!, { r4, pc } @@ -4781,6 +4830,7 @@ setup_sprite_update_texture_8bpp_cache: setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \ \ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ #define setup_sprite_tiled_advance_column() \ @@ -4817,6 +4867,7 @@ setup_sprite_update_texture_8bpp_cache: \ setup_sprite_tiled_advance_column(); \ setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ @@ -5175,6 +5226,8 @@ function(setup_sprite_##texture_mode##x4mode) \ ldr height, [sp, #44]; \ add fb_ptr, fb_ptr, y, lsl #11; \ \ + save_abi_regs(); \ + \ add fb_ptr, fb_ptr, x, lsl #1; \ and offset_v, v, #0xF; \ \ @@ -5898,7 +5951,7 @@ setup_sprite_untextured_height_loop: #define texel_block_expanded_b q2 #define texel_block_expanded_ab q2 #define texel_block_expanded_c q3 -#define texel_block_expanded_d q4 +#define texel_block_expanded_d q0 #define texel_block_expanded_cd q3 function(update_texture_4bpp_cache)