drc/psx_gpu: handle more calling conventions
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_arm_neon.S
index 8df7aca..efb065d 100644 (file)
 
 .align 4
 
+#ifndef __MACH__
+
 #define function(name)                                                         \
   .global name;                                                                \
+  .type name, %function;                                                       \
   name:                                                                        \
 
+#define JT_OP_REL(table_label, index_reg, temp)
+#define JT_OP(x...) x
+#define JTE(start, target) target
+
+#define EXTRA_UNSAVED_REGS
+
+#else
+
+#define function(name)                                                         \
+  .globl _##name;                                                              \
+  name:                                                                        \
+  _##name:                                                                     \
+
+#define JT_OP_REL(table_label, index_reg, temp)                                \
+  adr temp, table_label;                                                       \
+  ldr temp, [ temp, index_reg, lsl #2 ];                                       \
+  add pc, pc, temp                                                             \
+
+#define JT_OP(x...)
+#define JTE(start, target) (target - start)
+
+// r7 is preserved, but add it for EABI alignment..
+#define EXTRA_UNSAVED_REGS r7, r9,
+
+#define flush_render_block_buffer _flush_render_block_buffer
+#define setup_sprite_untextured_simple _setup_sprite_untextured_simple
+#define update_texture_8bpp_cache _update_texture_8bpp_cache
+
+#endif
+
 @ r0: psx_gpu
 @ r1: v_a
 @ r2: v_b
@@ -1560,9 +1593,9 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect)         \
   vpush { texture_mask };                                                      \
   vpush { uvrg_dx4 };                                                          \
                                                                                \
-  stmdb sp!, { r0 - r3, r12, r14 };                                            \
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
   bl flush_render_block_buffer;                                                \
-  ldmia sp!, { r0 - r3, r12, r14 };                                            \
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
                                                                                \
   vpop { uvrg_dx4 };                                                           \
   vpop { texture_mask };                                                       \
@@ -1757,9 +1790,9 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect)       \
   vpush { texture_mask };                                                      \
   vpush { uvrg_dx4 };                                                          \
                                                                                \
-  stmdb sp!, { r0 - r3, r12, r14 };                                            \
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
   bl flush_render_block_buffer;                                                \
-  ldmia sp!, { r0 - r3, r12, r14 };                                            \
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
                                                                                \
   vpop { uvrg_dx4 };                                                           \
   vpop { texture_mask };                                                       \
@@ -1873,9 +1906,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect)
  2:
   vpush { colors }
 
-  stmdb sp!, { r0 - r3, r12, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
   bl flush_render_block_buffer
-  ldmia sp!, { r0 - r3, r12, r14 }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
 
   vpop { colors }
 
@@ -2288,9 +2321,9 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect)     \
   /* TODO: Load from psx_gpu instead of saving/restoring these               */\
   vpush { rg_dx4 };                                                            \
                                                                                \
-  stmdb sp!, { r0 - r3, r12, r14 };                                            \
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
   bl flush_render_block_buffer;                                                \
-  ldmia sp!, { r0 - r3, r12, r14 };                                            \
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
                                                                                \
   vpop { rg_dx4 };                                                             \
                                                                                \
@@ -2493,17 +2526,19 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct)       \
   vmlal.u8 pixels, g_whole_8, d64_4;                                           \
   vmlal.u8 pixels, b_whole_8, d64_128;                                         \
                                                                                \
-  ldr pc, [ pc, right_mask, lsl #2 ];                                          \
+  JT_OP_REL(100f, right_mask, temp);                                           \
+  JT_OP(ldr pc, [ pc, right_mask, lsl #2 ]);                                   \
   nop;                                                                         \
+ 100:                                                                          \
   nop;                                                                         \
-  .word 4f;                                                                    \
-  .word 5f;                                                                    \
-  .word 6f;                                                                    \
-  .word 7f;                                                                    \
-  .word 8f;                                                                    \
-  .word 9f;                                                                    \
-  .word 10f;                                                                   \
-  .word 11f;                                                                   \
+  .word JTE(100b, 4f);                                                         \
+  .word JTE(100b, 5f);                                                         \
+  .word JTE(100b, 6f);                                                         \
+  .word JTE(100b, 7f);                                                         \
+  .word JTE(100b, 8f);                                                         \
+  .word JTE(100b, 9f);                                                         \
+  .word JTE(100b, 10f);                                                        \
+  .word JTE(100b, 11f);                                                        \
                                                                                \
  4:                                                                            \
   vst1.u16 { pixels_low[0] }, [ fb_ptr ];                                      \
@@ -2779,11 +2814,11 @@ function(texture_blocks_8bpp)
   ldmia sp!, { r3 - r11, pc }
 
 1:
-  stmdb sp!, { r1 - r2, r12 }
+  stmdb sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
 
   bl update_texture_8bpp_cache
 
-  ldmia sp!, { r1 - r2, r12 }
+  ldmia sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
   bal 0b
 
 
@@ -4392,6 +4427,8 @@ function(render_block_fill_body)
 #define fb_ptr_advance_column                             r12
 #define texture_block_ptr                                 r14
 
+#define temp                                              r14
+
 #define texture_page_ptr                                  r3
 #define left_block_mask                                   r4
 #define right_block_mask                                  r5
@@ -4447,9 +4484,9 @@ function(render_block_fill_body)
 setup_sprite_flush_blocks:
   vpush { q1 - q5 }
 
-  stmdb sp!, { r0 - r3, r12, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
   bl flush_render_block_buffer
-  ldmia sp!, { r0 - r3, r12, r14 }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
 
   vpop { q1 - q5 }
 
@@ -4464,9 +4501,9 @@ setup_sprite_update_texture_4bpp_cache:
 
 
 setup_sprite_update_texture_8bpp_cache:
-  stmdb sp!, { r0 - r3, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r14 }
   bl update_texture_8bpp_cache
-  ldmia sp!, { r0 - r3, pc }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS pc }
 
 
 #define setup_sprite_tiled_initialize_4bpp()                                   \
@@ -5212,24 +5249,26 @@ function(setup_sprite_##texture_mode##x4mode)                                  \
   add block, block, num_blocks, lsl #6;                                        \
                                                                                \
   orreq control_mask, control_mask, #0x2;                                      \
-  ldr pc, [ pc, control_mask, lsl #2 ];                                        \
+  JT_OP_REL(9f, control_mask, temp);                                           \
+  JT_OP(ldr pc, [ pc, control_mask, lsl #2 ]);                                 \
   nop;                                                                         \
                                                                                \
- .word setup_sprite_##texture_mode##_multi_multi_full_full##x4mode;            \
- .word setup_sprite_##texture_mode##_single_multi_full_none##x4mode;           \
- .word setup_sprite_##texture_mode##_multi_single_full_full##x4mode;           \
- .word setup_sprite_##texture_mode##_single_single_full_none##x4mode;          \
- .word setup_sprite_##texture_mode##_multi_multi_half_full##x4mode;            \
- .word setup_sprite_##texture_mode##_single_multi_half_right##x4mode;          \
- .word setup_sprite_##texture_mode##_multi_single_half_full##x4mode;           \
- .word setup_sprite_##texture_mode##_single_single_half_right##x4mode;         \
- .word setup_sprite_##texture_mode##_multi_multi_full_half##x4mode;            \
- .word setup_sprite_##texture_mode##_single_multi_half_left##x4mode;           \
- .word setup_sprite_##texture_mode##_multi_single_full_half##x4mode;           \
- .word setup_sprite_##texture_mode##_single_single_half_left##x4mode;          \
- .word setup_sprite_##texture_mode##_multi_multi_half_half##x4mode;            \
+ 9:                                                                            \
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_full_full##x4mode);   \
+ .word JTE(9b, setup_sprite_##texture_mode##_single_multi_full_none##x4mode);  \
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_single_full_full##x4mode);  \
+ .word JTE(9b, setup_sprite_##texture_mode##_single_single_full_none##x4mode); \
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_half_full##x4mode);   \
+ .word JTE(9b, setup_sprite_##texture_mode##_single_multi_half_right##x4mode); \
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_single_half_full##x4mode);  \
+ .word JTE(9b, setup_sprite_##texture_mode##_single_single_half_right##x4mode);\
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_full_half##x4mode);   \
+ .word JTE(9b, setup_sprite_##texture_mode##_single_multi_half_left##x4mode);  \
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_single_full_half##x4mode);  \
+ .word JTE(9b, setup_sprite_##texture_mode##_single_single_half_left##x4mode); \
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_multi_half_half##x4mode);   \
  .word 0x00000000;                                                             \
- .word setup_sprite_##texture_mode##_multi_single_half_half##x4mode;           \
+ .word JTE(9b, setup_sprite_##texture_mode##_multi_single_half_half##x4mode);  \
 
 
 setup_sprite_tiled_builder(4bpp,);
@@ -5334,6 +5373,7 @@ function(texture_sprite_blocks_8bpp)
 #undef texels_wide_high
 #undef texels_wide
 #undef fb_ptr2
+#undef temp
 
 #define psx_gpu                                           r0
 #define x                                                 r1
@@ -5387,9 +5427,9 @@ function(texture_sprite_blocks_8bpp)
 setup_sprites_16bpp_flush:
   vpush { d0 - d3 }
 
-  stmdb sp!, { r0 - r3, r12, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
   bl flush_render_block_buffer
-  ldmia sp!, { r0 - r3, r12, r14 }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
 
   vpop { d0 - d3 }