drc: ujump DS $ra overwrite fix?
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
index 39204ce..1fe2c0c 100644 (file)
@@ -126,6 +126,7 @@ struct ll_entry
 #else
   static const u_int using_tlb=0;
 #endif
+  static u_int sp_in_mirror;
   u_int stop_after_jal;
   extern u_char restore_candidate[512];
   extern int cycle_count;
@@ -2826,6 +2827,13 @@ void load_assemble(int i,struct regstat *i_regs)
       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
       #endif
       {
+        #ifdef PCSX
+        if(sp_in_mirror&&rs1[i]==29) {
+          emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
+          emit_cmpimm(HOST_TEMPREG,RAM_SIZE);
+        }
+        else
+        #endif
         emit_cmpimm(addr,RAM_SIZE);
         jaddr=(int)out;
         #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
@@ -2865,6 +2873,9 @@ void load_assemble(int i,struct regstat *i_regs)
           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
 #else
           if(!c) a=addr;
+#endif
+#ifdef PCSX
+          if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
 #endif
           emit_movsbl_indexed_tlb(x,a,map,tl);
         }
@@ -2890,6 +2901,9 @@ void load_assemble(int i,struct regstat *i_regs)
           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
 #else
           if(!c) a=addr;
+#endif
+#ifdef PCSX
+          if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
 #endif
           //#ifdef
           //emit_movswl_indexed_tlb(x,tl,map,tl);
@@ -2915,13 +2929,17 @@ void load_assemble(int i,struct regstat *i_regs)
   if (opcode[i]==0x23) { // LW
     if(!c||memtarget) {
       if(!dummy) {
+        int a=addr;
+#ifdef PCSX
+        if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
+#endif
         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
         #ifdef HOST_IMM_ADDR32
         if(c)
           emit_readword_tlb(constmap[i][s]+offset,map,tl);
         else
         #endif
-        emit_readword_indexed_tlb(0,addr,map,tl);
+        emit_readword_indexed_tlb(0,a,map,tl);
       }
       if(jaddr)
         add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
@@ -2947,6 +2965,9 @@ void load_assemble(int i,struct regstat *i_regs)
           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
 #else
           if(!c) a=addr;
+#endif
+#ifdef PCSX
+          if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
 #endif
           emit_movzbl_indexed_tlb(x,a,map,tl);
         }
@@ -2972,6 +2993,9 @@ void load_assemble(int i,struct regstat *i_regs)
           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
 #else
           if(!c) a=addr;
+#endif
+#ifdef PCSX
+          if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
 #endif
           //#ifdef
           //emit_movzwl_indexed_tlb(x,tl,map,tl);
@@ -2998,13 +3022,17 @@ void load_assemble(int i,struct regstat *i_regs)
     assert(th>=0);
     if(!c||memtarget) {
       if(!dummy) {
+        int a=addr;
+#ifdef PCSX
+        if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
+#endif
         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
         #ifdef HOST_IMM_ADDR32
         if(c)
           emit_readword_tlb(constmap[i][s]+offset,map,tl);
         else
         #endif
-        emit_readword_indexed_tlb(0,addr,map,tl);
+        emit_readword_indexed_tlb(0,a,map,tl);
       }
       if(jaddr)
         add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
@@ -3017,6 +3045,10 @@ void load_assemble(int i,struct regstat *i_regs)
   if (opcode[i]==0x37) { // LD
     if(!c||memtarget) {
       if(!dummy) {
+        int a=addr;
+#ifdef PCSX
+        if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
+#endif
         //gen_tlb_addr_r(tl,map);
         //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
         //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
@@ -3025,7 +3057,7 @@ void load_assemble(int i,struct regstat *i_regs)
           emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
         else
         #endif
-        emit_readdword_indexed_tlb(0,addr,map,th,tl);
+        emit_readdword_indexed_tlb(0,a,map,th,tl);
       }
       if(jaddr)
         add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
@@ -3105,9 +3137,15 @@ void store_assemble(int i,struct regstat *i_regs)
   else addr=s;
   if(!using_tlb) {
     if(!c) {
+      #ifdef PCSX
+      if(sp_in_mirror&&rs1[i]==29) {
+        emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
+        emit_cmpimm(HOST_TEMPREG,RAM_SIZE);
+      }
+      else
+      #endif
       #ifdef R29_HACK
       // Strmnnrmn's speed hack
-      memtarget=1;
       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
       #endif
       emit_cmpimm(addr,RAM_SIZE);
@@ -3115,6 +3153,7 @@ void store_assemble(int i,struct regstat *i_regs)
       if(s==addr) emit_mov(s,temp);
       #endif
       #ifdef R29_HACK
+      memtarget=1;
       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
       #endif
       {
@@ -3146,6 +3185,9 @@ void store_assemble(int i,struct regstat *i_regs)
       else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
 #else
       if(!c) a=addr;
+#endif
+#ifdef PCSX
+      if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
 #endif
       //gen_tlb_addr_w(temp,map);
       //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
@@ -3161,6 +3203,9 @@ void store_assemble(int i,struct regstat *i_regs)
       else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
 #else
       if(!c) a=addr;
+#endif
+#ifdef PCSX
+      if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
 #endif
       //#ifdef
       //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
@@ -3174,23 +3219,32 @@ void store_assemble(int i,struct regstat *i_regs)
     type=STOREH_STUB;
   }
   if (opcode[i]==0x2B) { // SW
-    if(!c||memtarget)
+    if(!c||memtarget) {
+      int a=addr;
+#ifdef PCSX
+      if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
+#endif
       //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
-      emit_writeword_indexed_tlb(tl,0,addr,map,temp);
+      emit_writeword_indexed_tlb(tl,0,a,map,temp);
+    }
     type=STOREW_STUB;
   }
   if (opcode[i]==0x3F) { // SD
     if(!c||memtarget) {
+      int a=addr;
+#ifdef PCSX
+      if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
+#endif
       if(rs2[i]) {
         assert(th>=0);
         //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
-        emit_writedword_indexed_tlb(th,tl,0,addr,map,temp);
+        emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
       }else{
         // Store zero
         //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
-        emit_writedword_indexed_tlb(tl,tl,0,addr,map,temp);
+        emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
       }
     }
     type=STORED_STUB;
@@ -5111,34 +5165,19 @@ void ujump_assemble(int i,struct regstat *i_regs)
     if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
   }
   #endif
-  ds_assemble(i+1,i_regs);
-  uint64_t bc_unneeded=branch_regs[i].u;
-  uint64_t bc_unneeded_upper=branch_regs[i].uu;
-  bc_unneeded|=1|(1LL<<rt1[i]);
-  bc_unneeded_upper|=1|(1LL<<rt1[i]);
-  wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
-                bc_unneeded,bc_unneeded_upper);
-  load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
   if(rt1[i]==31) {
     int rt;
     unsigned int return_address;
-    assert(rt1[i+1]!=31);
-    assert(rt2[i+1]!=31);
     rt=get_reg(branch_regs[i].regmap,31);
     assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
     //assert(rt>=0);
     return_address=start+i*4+8;
     if(rt>=0) {
       #ifdef USE_MINI_HT
-      if(internal_branch(branch_regs[i].is32,return_address)) {
-        int temp=rt+1;
-        if(temp==EXCLUDE_REG||temp>=HOST_REGS||
-           branch_regs[i].regmap[temp]>=0)
-        {
-          temp=get_reg(branch_regs[i].regmap,-1);
-        }
+      if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) {
+        int temp=-1; // note: must be ds-safe
         #ifdef HOST_TEMPREG
-        if(temp<0) temp=HOST_TEMPREG;
+        temp=HOST_TEMPREG;
         #endif
         if(temp>=0) do_miniht_insert(return_address,rt,temp);
         else emit_movimm(return_address,rt);
@@ -5159,6 +5198,14 @@ void ujump_assemble(int i,struct regstat *i_regs)
       }
     }
   }
+  ds_assemble(i+1,i_regs);
+  uint64_t bc_unneeded=branch_regs[i].u;
+  uint64_t bc_unneeded_upper=branch_regs[i].uu;
+  bc_unneeded|=1|(1LL<<rt1[i]);
+  bc_unneeded_upper|=1|(1LL<<rt1[i]);
+  wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
+                bc_unneeded,bc_unneeded_upper);
+  load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
   int cc,adj;
   cc=get_reg(branch_regs[i].regmap,CCREG);
   assert(cc==HOST_CCREG);
@@ -7741,6 +7788,7 @@ void new_dynarec_clear_full()
 #ifndef DISABLE_TLB
   using_tlb=0;
 #endif
+  sp_in_mirror=0;
   for(n=0;n<524288;n++) // 0 .. 0x7FFFFFFF
     memory_map[n]=-1;
   for(n=524288;n<526336;n++) // 0x80000000 .. 0x807FFFFF
@@ -7850,6 +7898,11 @@ int new_recompile_block(int addr)
   start = (u_int)addr&~3;
   //assert(((u_int)addr&1)==0);
 #ifdef PCSX
+  if(!sp_in_mirror&&(signed int)(psxRegs.GPR.n.sp&0xffe00000)>0x80200000&&
+     0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)<RAM_SIZE) {
+    printf("SP hack enabled (%08x), @%08x\n", psxRegs.GPR.n.sp);
+    sp_in_mirror=1;
+  }
   if (Config.HLE && start == 0x80001000) // hlecall
   {
     // XXX: is this enough? Maybe check hleSoftCall?
@@ -8499,13 +8552,6 @@ int new_recompile_block(int addr)
           do_in_intrp=1;
         }
       }
-      // check for link register access in delay slot
-      // TODO: teach the recompiler to handle this
-      int rt1_=rt1[i-1];
-      if(rt1_!=0&&(rs1[i]==rt1_||rs2[i]==rt1_||rt1[i]==rt1_||rt2[i]==rt1_)) {
-        printf("link access in delay slot @%08x (%08x)\n", addr + i*4, addr);
-        do_in_intrp=1;
-      }
       if(do_in_intrp) {
         rs1[i-1]=CCREG;
         rs2[i-1]=rt1[i-1]=rt2[i-1]=0;
@@ -8840,18 +8886,18 @@ int new_recompile_block(int addr)
           clear_const(&current,rt1[i]);
           alloc_cc(&current,i);
           dirty_reg(&current,CCREG);
+          ooo[i]=1;
+          delayslot_alloc(&current,i+1);
           if (rt1[i]==31) {
             alloc_reg(&current,i,31);
             dirty_reg(&current,31);
-            assert(rs1[i+1]!=31&&rs2[i+1]!=31);
-            assert(rt1[i+1]!=rt1[i]);
+            //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
+            //assert(rt1[i+1]!=rt1[i]);
             #ifdef REG_PREFETCH
             alloc_reg(&current,i,PTEMP);
             #endif
             //current.is32|=1LL<<rt1[i];
           }
-          ooo[i]=1;
-          delayslot_alloc(&current,i+1);
           //current.isconst=0; // DEBUG
           ds=1;
           //printf("i=%d, isconst=%x\n",i,current.isconst);
@@ -10777,9 +10823,9 @@ int new_recompile_block(int addr)
       if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
       {
         // Load the delay slot registers if necessary
-        if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i])
+        if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i]&&(rs1[i+1]!=rt1[i]||rt1[i]==0))
           load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]);
-        if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i])
+        if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i]&&(rs2[i+1]!=rt1[i]||rt1[i]==0))
           load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]);
         if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a)
           load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP);