X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=efb065d7e44959c50a759d3d39f8610d78ebfb67;hb=4d6467383217647e3fbc58ab9213a31c0f3bd8c9;hp=8df7acadc66ba55f0d7e89e3a0a204a2d07003b2;hpb=ed0fd81dfdb63fe5941b9010ace353719168d3ae;p=pcsx_rearmed.git diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 8df7acad..efb065d7 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -194,10 +194,43 @@ .align 4 +#ifndef __MACH__ + #define function(name) \ .global name; \ + .type name, %function; \ name: \ +#define JT_OP_REL(table_label, index_reg, temp) +#define JT_OP(x...) x +#define JTE(start, target) target + +#define EXTRA_UNSAVED_REGS + +#else + +#define function(name) \ + .globl _##name; \ + name: \ + _##name: \ + +#define JT_OP_REL(table_label, index_reg, temp) \ + adr temp, table_label; \ + ldr temp, [ temp, index_reg, lsl #2 ]; \ + add pc, pc, temp \ + +#define JT_OP(x...) +#define JTE(start, target) (target - start) + +// r7 is preserved, but add it for EABI alignment.. +#define EXTRA_UNSAVED_REGS r7, r9, + +#define flush_render_block_buffer _flush_render_block_buffer +#define setup_sprite_untextured_simple _setup_sprite_untextured_simple +#define update_texture_8bpp_cache _update_texture_8bpp_cache + +#endif + @ r0: psx_gpu @ r1: v_a @ r2: v_b @@ -1560,9 +1593,9 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1757,9 +1790,9 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1873,9 +1906,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) 2: vpush { colors } - stmdb sp!, { r0 - r3, r12, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } vpop { colors } @@ -2288,9 +2321,9 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ /* TODO: Load from psx_gpu instead of saving/restoring these */\ vpush { rg_dx4 }; \ \ - stmdb sp!, { r0 - r3, r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ \ vpop { rg_dx4 }; \ \ @@ -2493,17 +2526,19 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ vmlal.u8 pixels, g_whole_8, d64_4; \ vmlal.u8 pixels, b_whole_8, d64_128; \ \ - ldr pc, [ pc, right_mask, lsl #2 ]; \ + JT_OP_REL(100f, right_mask, temp); \ + JT_OP(ldr pc, [ pc, right_mask, lsl #2 ]); \ nop; \ + 100: \ nop; \ - .word 4f; \ - .word 5f; \ - .word 6f; \ - .word 7f; \ - .word 8f; \ - .word 9f; \ - .word 10f; \ - .word 11f; \ + .word JTE(100b, 4f); \ + .word JTE(100b, 5f); \ + .word JTE(100b, 6f); \ + .word JTE(100b, 7f); \ + .word JTE(100b, 8f); \ + .word JTE(100b, 9f); \ + .word JTE(100b, 10f); \ + .word JTE(100b, 11f); \ \ 4: \ vst1.u16 { pixels_low[0] }, [ fb_ptr ]; \ @@ -2779,11 +2814,11 @@ function(texture_blocks_8bpp) ldmia sp!, { r3 - r11, pc } 1: - stmdb sp!, { r1 - r2, r12 } + stmdb sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 } bl update_texture_8bpp_cache - ldmia sp!, { r1 - r2, r12 } + ldmia sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 } bal 0b @@ -4392,6 +4427,8 @@ function(render_block_fill_body) #define fb_ptr_advance_column r12 #define texture_block_ptr r14 +#define temp r14 + #define texture_page_ptr r3 #define left_block_mask r4 #define right_block_mask r5 @@ -4447,9 +4484,9 @@ function(render_block_fill_body) setup_sprite_flush_blocks: vpush { q1 - q5 } - stmdb sp!, { r0 - r3, r12, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } vpop { q1 - q5 } @@ -4464,9 +4501,9 @@ setup_sprite_update_texture_4bpp_cache: setup_sprite_update_texture_8bpp_cache: - stmdb sp!, { r0 - r3, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r14 } bl update_texture_8bpp_cache - ldmia sp!, { r0 - r3, pc } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS pc } #define setup_sprite_tiled_initialize_4bpp() \ @@ -5212,24 +5249,26 @@ function(setup_sprite_##texture_mode##x4mode) \ add block, block, num_blocks, lsl #6; \ \ orreq control_mask, control_mask, #0x2; \ - ldr pc, [ pc, control_mask, lsl #2 ]; \ + JT_OP_REL(9f, control_mask, temp); \ + JT_OP(ldr pc, [ pc, control_mask, lsl #2 ]); \ nop; \ \ - .word setup_sprite_##texture_mode##_multi_multi_full_full##x4mode; \ - .word setup_sprite_##texture_mode##_single_multi_full_none##x4mode; \ - .word setup_sprite_##texture_mode##_multi_single_full_full##x4mode; \ - .word setup_sprite_##texture_mode##_single_single_full_none##x4mode; \ - .word setup_sprite_##texture_mode##_multi_multi_half_full##x4mode; \ - .word setup_sprite_##texture_mode##_single_multi_half_right##x4mode; \ - .word setup_sprite_##texture_mode##_multi_single_half_full##x4mode; \ - .word setup_sprite_##texture_mode##_single_single_half_right##x4mode; \ - .word setup_sprite_##texture_mode##_multi_multi_full_half##x4mode; \ - .word setup_sprite_##texture_mode##_single_multi_half_left##x4mode; \ - .word setup_sprite_##texture_mode##_multi_single_full_half##x4mode; \ - .word setup_sprite_##texture_mode##_single_single_half_left##x4mode; \ - .word setup_sprite_##texture_mode##_multi_multi_half_half##x4mode; \ + 9: \ + .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_full_full##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_single_multi_full_none##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_multi_single_full_full##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_single_single_full_none##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_half_full##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_single_multi_half_right##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_multi_single_half_full##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_single_single_half_right##x4mode);\ + .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_full_half##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_single_multi_half_left##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_multi_single_full_half##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_single_single_half_left##x4mode); \ + .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_half_half##x4mode); \ .word 0x00000000; \ - .word setup_sprite_##texture_mode##_multi_single_half_half##x4mode; \ + .word JTE(9b, setup_sprite_##texture_mode##_multi_single_half_half##x4mode); \ setup_sprite_tiled_builder(4bpp,); @@ -5334,6 +5373,7 @@ function(texture_sprite_blocks_8bpp) #undef texels_wide_high #undef texels_wide #undef fb_ptr2 +#undef temp #define psx_gpu r0 #define x r1 @@ -5387,9 +5427,9 @@ function(texture_sprite_blocks_8bpp) setup_sprites_16bpp_flush: vpush { d0 - d3 } - stmdb sp!, { r0 - r3, r12, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } vpop { d0 - d3 }