drc: merge Ari64's patch: 17_branch_target_liveness_analysis
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
index 66603ea..c6d83c2 100644 (file)
@@ -1593,13 +1593,9 @@ void load_alloc(struct regstat *current,int i)
   //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
   if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
-  if(rt1[i]) {
+  if(rt1[i]&&!((current->u>>rt1[i])&1)) {
     alloc_reg(current,i,rt1[i]);
-    if(get_reg(current->regmap,rt1[i])<0) {
-      // dummy load, but we still need a register to calculate the address
-      alloc_reg_temp(current,i,-1);
-      minimum_free_regs[i]=1;
-    }
+    assert(get_reg(current->regmap,rt1[i])>=0);
     if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
     {
       current->is32&=~(1LL<<rt1[i]);
@@ -1627,12 +1623,14 @@ void load_alloc(struct regstat *current,int i)
   }
   else
   {
-    // Load to r0 (dummy load)
+    // Load to r0 or unneeded register (dummy load)
     // but we still need a register to calculate the address
     if(opcode[i]==0x22||opcode[i]==0x26)
     {
       alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
     }
+    // If using TLB, need a register for pointer to the mapping table
+    if(using_tlb) alloc_reg(current,i,TLREG);
     alloc_reg_temp(current,i,-1);
     minimum_free_regs[i]=1;
     if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
@@ -2842,6 +2840,7 @@ void load_assemble(int i,struct regstat *i_regs)
     if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
     map=get_reg(i_regs->regmap,TLREG);
     assert(map>=0);
+    reglist&=~(1<<map);
     map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
     do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
   }
@@ -3164,6 +3163,7 @@ void store_assemble(int i,struct regstat *i_regs)
     if (opcode[i]==0x29) x=2; // SH
     map=get_reg(i_regs->regmap,TLREG);
     assert(map>=0);
+    reglist&=~(1<<map);
     map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
     do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
   }
@@ -3354,6 +3354,7 @@ void storelr_assemble(int i,struct regstat *i_regs)
   }else{ // using tlb
     int map=get_reg(i_regs->regmap,TLREG);
     assert(map>=0);
+    reglist&=~(1<<map);
     map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
     if(!c&&!offset&&s>=0) emit_mov(s,temp);
     do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
@@ -3604,6 +3605,7 @@ void c1ls_assemble(int i,struct regstat *i_regs)
   {
     map=get_reg(i_regs->regmap,TLREG);
     assert(map>=0);
+    reglist&=~(1<<map);
     if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
       map=do_tlb_r(offset||c||s<0?ar:s,ar,map,0,-1,-1,c,constmap[i][s]+offset);
     }
@@ -4481,7 +4483,7 @@ void load_all_regs(signed char i_regmap[])
         emit_zeroreg(hr);
       }
       else
-      if(i_regmap[hr]>0 && i_regmap[hr]!=CCREG)
+      if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
       {
         emit_loadreg(i_regmap[hr],hr);
       }
@@ -4500,7 +4502,7 @@ void load_needed_regs(signed char i_regmap[],signed char next_regmap[])
           emit_zeroreg(hr);
         }
         else
-        if(i_regmap[hr]>0 && i_regmap[hr]!=CCREG)
+        if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
         {
           emit_loadreg(i_regmap[hr],hr);
         }
@@ -4520,7 +4522,7 @@ void load_regs_entry(int t)
   }
   // Load 32-bit regs
   for(hr=0;hr<HOST_REGS;hr++) {
-    if(regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<64) {
+    if(regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
       if(regs[t].regmap_entry[hr]==0) {
         emit_zeroreg(hr);
       }
@@ -4532,7 +4534,7 @@ void load_regs_entry(int t)
   }
   // Load 64-bit regs
   for(hr=0;hr<HOST_REGS;hr++) {
-    if(regs[t].regmap_entry[hr]>=64) {
+    if(regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
       assert(regs[t].regmap_entry[hr]!=64);
       if((regs[t].was32>>(regs[t].regmap_entry[hr]&63))&1) {
         int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
@@ -4612,7 +4614,7 @@ void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int ad
     }
     // Load 32-bit regs
     for(hr=0;hr<HOST_REGS;hr++) {
-      if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<64) {
+      if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
         #ifdef DESTRUCTIVE_WRITEBACK
         if(i_regmap[hr]!=regs[t].regmap_entry[hr] || ( !((regs[t].dirty>>hr)&1) && ((i_dirty>>hr)&1) && (((i_is32&~unneeded_reg_upper[t])>>i_regmap[hr])&1) ) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
         #else
@@ -4630,7 +4632,7 @@ void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int ad
     }
     //Load 64-bit regs
     for(hr=0;hr<HOST_REGS;hr++) {
-      if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=64) {
+      if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
         if(i_regmap[hr]!=regs[t].regmap_entry[hr]) {
           assert(regs[t].regmap_entry[hr]!=64);
           if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
@@ -4671,19 +4673,19 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
       {
         if(i_regmap[hr]!=regs[t].regmap_entry[hr])
         {
-          if(regs[t].regmap_entry[hr]!=-1)
+          if(regs[t].regmap_entry[hr]>=0&&(regs[t].regmap_entry[hr]|64)<TEMPREG+64)
           {
             return 0;
           }
           else 
           if((i_dirty>>hr)&1)
           {
-            if(i_regmap[hr]<64)
+            if(i_regmap[hr]<TEMPREG)
             {
               if(!((unneeded_reg[t]>>i_regmap[hr])&1))
                 return 0;
             }
-            else
+            else if(i_regmap[hr]>=64&&i_regmap[hr]<TEMPREG+64)
             {
               if(!((unneeded_reg_upper[t]>>(i_regmap[hr]&63))&1))
                 return 0;
@@ -4857,7 +4859,7 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert)
   }
   else
   {
-    emit_cmpimm(HOST_CCREG,-2*(count+2));
+    emit_cmpimm(HOST_CCREG,-CLOCK_DIVIDER*(count+2));
     jaddr=(int)out;
     emit_jns(0);
   }
@@ -7451,6 +7453,10 @@ void clean_registers(int istart,int iend,int wr)
                   will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<<r);
                   wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
                 }
+                if(branch_regs[i].regmap[r]>=0) {
+                  will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
+                  wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
+                }
               }
             }
           //}
@@ -7480,13 +7486,14 @@ void clean_registers(int istart,int iend,int wr)
           //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
             for(r=0;r<HOST_REGS;r++) {
               if(r!=EXCLUDE_REG) {
-                if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
+                signed char target_reg=branch_regs[i].regmap[r];
+                if(target_reg==regs[(ba[i]-start)>>2].regmap_entry[r]) {
                   will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<<r);
                   wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
                 }
-                else
-                {
-                  will_dirty_i&=~(1<<r);
+                else if(target_reg>=0) {
+                  will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
+                  wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
                 }
                 // Treat delay slot as part of branch too
                 /*if(regs[i+1].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
@@ -7523,7 +7530,7 @@ void clean_registers(int istart,int iend,int wr)
               }
             }
           }
-          // Merge in delay slot
+          // Merge in delay slot (won't dirty)
           for(r=0;r<HOST_REGS;r++) {
             if(r!=EXCLUDE_REG) {
               if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
@@ -7640,7 +7647,7 @@ void clean_registers(int istart,int iend,int wr)
             regs[i].wasdirty|=will_dirty_i&(1<<r);
           }
         }
-        else if((nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
+        else if(regmap_pre[i][r]>=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
           // Register moved to a different register
           will_dirty_i&=~(1<<r);
           wont_dirty_i&=~(1<<r);
@@ -8575,6 +8582,7 @@ int new_recompile_block(int addr)
       // Does the block continue due to a branch?
       for(j=i-1;j>=0;j--)
       {
+        if(ba[j]==start+i*4) done=j=0; // Branch into delay slot
         if(ba[j]==start+i*4+4) done=j=0;
         if(ba[j]==start+i*4+8) done=j=0;
       }
@@ -10811,8 +10819,13 @@ int new_recompile_block(int addr)
         wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre,
               unneeded_reg[i],unneeded_reg_upper[i]);
       }
-      is32_pre=regs[i].is32;
-      dirty_pre=regs[i].dirty;
+      if((itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)&&!likely[i]) {
+        is32_pre=branch_regs[i].is32;
+        dirty_pre=branch_regs[i].dirty;
+      }else{
+        is32_pre=regs[i].is32;
+        dirty_pre=regs[i].dirty;
+      }
       #endif
       // write back
       if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))