drc/psx_gpu: handle more calling conventions
authornotaz <notasas@gmail.com>
Tue, 2 Apr 2013 01:03:25 +0000 (04:03 +0300)
committernotaz <notasas@gmail.com>
Tue, 2 Apr 2013 01:03:25 +0000 (04:03 +0300)
libpcsxcore/new_dynarec/assem_arm.c
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S

index 8fe88fd..45edd65 100644 (file)
 char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
 #endif
 
+#ifndef __MACH__
+#define CALLER_SAVE_REGS 0x100f
+#else
+#define CALLER_SAVE_REGS 0x120f
+#endif
+
 extern int cycle_count;
 extern int last_count;
 extern int pcaddr;
@@ -2627,13 +2633,13 @@ static void restore_regs_all(u_int reglist)
 // Save registers before function call
 static void save_regs(u_int reglist)
 {
-  reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
+  reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
   save_regs_all(reglist);
 }
 // Restore registers after function call
 static void restore_regs(u_int reglist)
 {
-  reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
+  reglist&=CALLER_SAVE_REGS;
   restore_regs_all(reglist);
 }
 
@@ -4518,7 +4524,7 @@ static void c2op_assemble(int i,struct regstat *i_regs)
   for(hr=0;hr<HOST_REGS;hr++) {
     if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
   }
-  reglist=reglist_full&0x100f;
+  reglist=reglist_full&CALLER_SAVE_REGS;
 
   if (gte_handlers[c2op]!=NULL) {
     need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
@@ -4536,7 +4542,7 @@ static void c2op_assemble(int i,struct regstat *i_regs)
         int v  = (source[i] >> 15) & 3;
         int cv = (source[i] >> 13) & 3;
         int mx = (source[i] >> 17) & 3;
-        reglist=reglist_full&0x10ff; // +{r4-r7}
+        reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
         c2op_prologue(c2op,reglist);
         /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
         if(v<3)
@@ -5414,7 +5420,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs)
         assert(m2h>=0);
         assert(m1l>=0);
         assert(m2l>=0);
-        save_regs(0x100f);
+        save_regs(CALLER_SAVE_REGS);
         if(m1l!=0) emit_mov(m1l,0);
         if(m1h==0) emit_readword((int)&dynarec_local,1);
         else if(m1h>1) emit_mov(m1h,1);
@@ -5423,7 +5429,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs)
         if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
         else if(m2h>3) emit_mov(m2h,3);
         emit_call((int)&multu64);
-        restore_regs(0x100f);
+        restore_regs(CALLER_SAVE_REGS);
         signed char hih=get_reg(i_regs->regmap,HIREG|64);
         signed char hil=get_reg(i_regs->regmap,HIREG);
         signed char loh=get_reg(i_regs->regmap,LOREG|64);
@@ -5494,7 +5500,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs)
         assert(d2h>=0);
         assert(d1l>=0);
         assert(d2l>=0);
-        save_regs(0x100f);
+        save_regs(CALLER_SAVE_REGS);
         if(d1l!=0) emit_mov(d1l,0);
         if(d1h==0) emit_readword((int)&dynarec_local,1);
         else if(d1h>1) emit_mov(d1h,1);
@@ -5503,7 +5509,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs)
         if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
         else if(d2h>3) emit_mov(d2h,3);
         emit_call((int)&div64);
-        restore_regs(0x100f);
+        restore_regs(CALLER_SAVE_REGS);
         signed char hih=get_reg(i_regs->regmap,HIREG|64);
         signed char hil=get_reg(i_regs->regmap,HIREG);
         signed char loh=get_reg(i_regs->regmap,LOREG|64);
@@ -5527,7 +5533,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs)
         assert(d2h>=0);
         assert(d1l>=0);
         assert(d2l>=0);
-        save_regs(0x100f);
+        save_regs(CALLER_SAVE_REGS);
         if(d1l!=0) emit_mov(d1l,0);
         if(d1h==0) emit_readword((int)&dynarec_local,1);
         else if(d1h>1) emit_mov(d1h,1);
@@ -5536,7 +5542,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs)
         if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
         else if(d2h>3) emit_mov(d2h,3);
         emit_call((int)&divu64);
-        restore_regs(0x100f);
+        restore_regs(CALLER_SAVE_REGS);
         signed char hih=get_reg(i_regs->regmap,HIREG|64);
         signed char hil=get_reg(i_regs->regmap,HIREG);
         signed char loh=get_reg(i_regs->regmap,LOREG|64);
index 63252b0..efb065d 100644 (file)
 #define JT_OP(x...) x
 #define JTE(start, target) target
 
+#define EXTRA_UNSAVED_REGS
+
 #else
 
 #define function(name)                                                         \
 #define JT_OP(x...)
 #define JTE(start, target) (target - start)
 
+// r7 is preserved, but add it for EABI alignment..
+#define EXTRA_UNSAVED_REGS r7, r9,
+
 #define flush_render_block_buffer _flush_render_block_buffer
 #define setup_sprite_untextured_simple _setup_sprite_untextured_simple
 #define update_texture_8bpp_cache _update_texture_8bpp_cache
@@ -1588,9 +1593,9 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect)         \
   vpush { texture_mask };                                                      \
   vpush { uvrg_dx4 };                                                          \
                                                                                \
-  stmdb sp!, { r0 - r3, r12, r14 };                                            \
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
   bl flush_render_block_buffer;                                                \
-  ldmia sp!, { r0 - r3, r12, r14 };                                            \
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
                                                                                \
   vpop { uvrg_dx4 };                                                           \
   vpop { texture_mask };                                                       \
@@ -1785,9 +1790,9 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect)       \
   vpush { texture_mask };                                                      \
   vpush { uvrg_dx4 };                                                          \
                                                                                \
-  stmdb sp!, { r0 - r3, r12, r14 };                                            \
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
   bl flush_render_block_buffer;                                                \
-  ldmia sp!, { r0 - r3, r12, r14 };                                            \
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
                                                                                \
   vpop { uvrg_dx4 };                                                           \
   vpop { texture_mask };                                                       \
@@ -1901,9 +1906,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect)
  2:
   vpush { colors }
 
-  stmdb sp!, { r0 - r3, r12, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
   bl flush_render_block_buffer
-  ldmia sp!, { r0 - r3, r12, r14 }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
 
   vpop { colors }
 
@@ -2316,9 +2321,9 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect)     \
   /* TODO: Load from psx_gpu instead of saving/restoring these               */\
   vpush { rg_dx4 };                                                            \
                                                                                \
-  stmdb sp!, { r0 - r3, r12, r14 };                                            \
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
   bl flush_render_block_buffer;                                                \
-  ldmia sp!, { r0 - r3, r12, r14 };                                            \
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 };                        \
                                                                                \
   vpop { rg_dx4 };                                                             \
                                                                                \
@@ -2809,11 +2814,11 @@ function(texture_blocks_8bpp)
   ldmia sp!, { r3 - r11, pc }
 
 1:
-  stmdb sp!, { r1 - r2, r12 }
+  stmdb sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
 
   bl update_texture_8bpp_cache
 
-  ldmia sp!, { r1 - r2, r12 }
+  ldmia sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
   bal 0b
 
 
@@ -4479,9 +4484,9 @@ function(render_block_fill_body)
 setup_sprite_flush_blocks:
   vpush { q1 - q5 }
 
-  stmdb sp!, { r0 - r3, r12, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
   bl flush_render_block_buffer
-  ldmia sp!, { r0 - r3, r12, r14 }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
 
   vpop { q1 - q5 }
 
@@ -4496,9 +4501,9 @@ setup_sprite_update_texture_4bpp_cache:
 
 
 setup_sprite_update_texture_8bpp_cache:
-  stmdb sp!, { r0 - r3, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r14 }
   bl update_texture_8bpp_cache
-  ldmia sp!, { r0 - r3, pc }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS pc }
 
 
 #define setup_sprite_tiled_initialize_4bpp()                                   \
@@ -5422,9 +5427,9 @@ function(texture_sprite_blocks_8bpp)
 setup_sprites_16bpp_flush:
   vpush { d0 - d3 }
 
-  stmdb sp!, { r0 - r3, r12, r14 }
+  stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
   bl flush_render_block_buffer
-  ldmia sp!, { r0 - r3, r12, r14 }
+  ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
 
   vpop { d0 - d3 }