drc: don't clear ARM caches on whole translation cache - it's very slow
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
index 62250d9..e4e6a9a 100644 (file)
@@ -1076,25 +1076,37 @@ void ll_clear(struct ll_entry **head)
 // Dereference the pointers and remove if it matches
 void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
 {
+  u_int old_host_addr=0;
   while(head) {
     int ptr=get_pointer(head->addr);
     inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
     if(((ptr>>shift)==(addr>>shift)) ||
        (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
     {
-      inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
-      kill_pointer(head->addr);
+      printf("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
+      u_int host_addr=(u_int)kill_pointer(head->addr);
+
+      if((host_addr>>12)!=(old_host_addr>>12)) {
+        #ifdef __arm__
+        __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
+        #endif
+        old_host_addr=host_addr;
+      }
     }
     head=head->next;
   }
+  #ifdef __arm__
+  if (old_host_addr)
+    __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
+  #endif
 }
 
 // This is called when we write to a compiled block (see do_invstub)
-int invalidate_page(u_int page)
+void invalidate_page(u_int page)
 {
-  int modified=0;
   struct ll_entry *head;
   struct ll_entry *next;
+  u_int old_host_addr=0;
   head=jump_in[page];
   jump_in[page]=0;
   while(head!=NULL) {
@@ -1108,17 +1120,25 @@ int invalidate_page(u_int page)
   jump_out[page]=0;
   while(head!=NULL) {
     inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
-    kill_pointer(head->addr);
-    modified=1;
+    u_int host_addr=(u_int)kill_pointer(head->addr);
+
+    if((host_addr>>12)!=(old_host_addr>>12)) {
+      #ifdef __arm__
+      __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
+      #endif
+      old_host_addr=host_addr;
+    }
     next=head->next;
     free(head);
     head=next;
   }
-  return modified;
+  #ifdef __arm__
+  if (old_host_addr)
+    __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff));
+  #endif
 }
 void invalidate_block(u_int block)
 {
-  int modified;
   u_int page=get_page(block<<12);
   u_int vpage=get_vpage(block<<12);
   inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
@@ -1151,7 +1171,7 @@ void invalidate_block(u_int block)
     head=head->next;
   }
   //printf("first=%d last=%d\n",first,last);
-  modified=invalidate_page(page);
+  invalidate_page(page);
   assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
   assert(last<page+5);
   // Invalidate the adjacent pages if a block crosses a 4K boundary
@@ -1177,10 +1197,7 @@ void invalidate_block(u_int block)
   }
   else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
 #endif
-  #ifdef __arm__
-  if(modified)
-    __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
-  #endif
+
   #ifdef USE_MINI_HT
   memset(mini_ht,-1,sizeof(mini_ht));
   #endif
@@ -1908,9 +1925,9 @@ static void pagespan_alloc(struct regstat *current,int i)
   if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
   {
     alloc_reg(current,i,rs1[i]);
-    if (rt1[i]==31) {
-      alloc_reg(current,i,31);
-      dirty_reg(current,31);
+    if (rt1[i]!=0) {
+      alloc_reg(current,i,rt1[i]);
+      dirty_reg(current,rt1[i]);
     }
   }
   if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
@@ -2717,7 +2734,7 @@ void load_assemble(int i,struct regstat *i_regs)
   int s,th,tl,addr,map=-1;
   int offset;
   int jaddr=0;
-  int memtarget,c=0;
+  int memtarget=0,c=0;
   u_int hr,reglist=0;
   th=get_reg(i_regs->regmap,rt1[i]|64);
   tl=get_reg(i_regs->regmap,rt1[i]);
@@ -2732,11 +2749,21 @@ void load_assemble(int i,struct regstat *i_regs)
     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
   }
-  if(offset||s<0||c) addr=tl;
-  else addr=s;
   //printf("load_assemble: c=%d\n",c);
   //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
   // FIXME: Even if the load is a NOP, we should check for pagefaults...
+#ifdef PCSX
+  if(tl<0) {
+    if(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) {
+      // could be FIFO, must perform the read
+      assem_debug("(forced read)\n");
+      tl=get_reg(i_regs->regmap,-1);
+      assert(tl>=0);
+    }
+  }
+  if(offset||s<0||c) addr=tl;
+  else addr=s;
+#endif
   if(tl>=0) {
     //assert(tl>=0);
     //assert(rt1[i]);
@@ -3708,9 +3735,9 @@ void hlecall_assemble(int i,struct regstat *i_regs)
   assert(ccreg==HOST_CCREG);
   assert(!is_delayslot);
   emit_movimm(start+i*4+4,0); // Get PC
-  emit_movimm(source[i],1); // opcode
+  emit_movimm((int)psxHLEt[source[i]&7],1);
   emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX
-  emit_jmp((int)jump_hlecall); // XXX
+  emit_jmp((int)jump_hlecall);
 }
 
 void ds_assemble(int i,struct regstat *i_regs)
@@ -5076,11 +5103,11 @@ void rjump_assemble(int i,struct regstat *i_regs)
   wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
                 bc_unneeded,bc_unneeded_upper);
   load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],CCREG);
-  if(rt1[i]==31) {
+  if(rt1[i]!=0) {
     int rt,return_address;
-    assert(rt1[i+1]!=31);
-    assert(rt2[i+1]!=31);
-    rt=get_reg(branch_regs[i].regmap,31);
+    assert(rt1[i+1]!=rt1[i]);
+    assert(rt2[i+1]!=rt1[i]);
+    rt=get_reg(branch_regs[i].regmap,rt1[i]);
     assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
     assert(rt>=0);
     return_address=start+i*4+8;
@@ -6196,7 +6223,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs)
     emit_mov(s1l,addr);
     if(opcode2[i]==9) // JALR
     {
-      int rt=get_reg(i_regs->regmap,31);
+      int rt=get_reg(i_regs->regmap,rt1[i]);
       emit_movimm(start+i*4+8,rt);
     }
   }
@@ -6697,9 +6724,6 @@ void unneeded_registers(int istart,int iend,int r)
     // Save it
     unneeded_reg[i]=u;
     unneeded_reg_upper[i]=uu;
-#ifdef FORCE32
-    unneeded_reg_upper[i]=-1LL;
-#endif
     /*
     printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
     printf("U:");
@@ -6721,6 +6745,12 @@ void unneeded_registers(int istart,int iend,int r)
     }
     printf("\n");*/
   }
+#ifdef FORCE32
+  for (i=iend;i>=istart;i--)
+  {
+    unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL;
+  }
+#endif
 }
 
 // Identify registers which are likely to contain 32-bit values
@@ -7518,7 +7548,11 @@ void disassemble_inst(int i)
       case FJUMP:
         printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
       case RJUMP:
-        printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);break;
+        if (rt1[i]!=31)
+          printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i]);
+        else
+          printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);
+        break;
       case SPAN:
         printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],ba[i]);break;
       case IMM16:
@@ -7704,12 +7738,16 @@ int new_recompile_block(int addr)
 #ifdef PCSX
   if (Config.HLE && start == 0x80001000) {
     // XXX: is this enough? Maybe check hleSoftCall?
+    u_int beginning=(u_int)out;
     u_int page=get_page(start);
     ll_add(jump_in+page,start,out);
     invalid_code[start>>12]=0;
     emit_movimm(start,0);
     emit_writeword(0,(int)&pcaddr);
-    emit_jmp((int)new_dyna_leave); // enough??
+    emit_jmp((int)new_dyna_leave);
+#ifdef __arm__
+    __clear_cache((void *)beginning,out);
+#endif
     return 0;
   }
   else if ((u_int)addr < 0x00200000) {
@@ -8134,9 +8172,9 @@ int new_recompile_block(int addr)
         rs2[i]=0;
         rt1[i]=0;
         rt2[i]=0;
-        // The JALR instruction writes to r31.
+        // The JALR instruction writes to rd.
         if (op2&1) {
-          rt1[i]=31;   
+          rt1[i]=(source[i]>>11)&0x1f;
         }
         rs2[i]=CCREG;
         break;
@@ -8300,7 +8338,7 @@ int new_recompile_block(int addr)
     else ba[i]=-1;
     /* Is this the end of the block? */
     if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) {
-      if(rt1[i-1]!=31) { // Continue past subroutine call (JAL)
+      if(rt1[i-1]==0) { // Continue past subroutine call (JAL)
         done=1;
         // Does the block continue due to a branch?
         for(j=i-1;j>=0;j--)
@@ -8643,9 +8681,9 @@ int new_recompile_block(int addr)
           dirty_reg(&current,CCREG);
           if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
             alloc_reg(&current,i,rs1[i]);
-            if (rt1[i]==31) {
-              alloc_reg(&current,i,31);
-              dirty_reg(&current,31);
+            if (rt1[i]!=0) {
+              alloc_reg(&current,i,rt1[i]);
+              dirty_reg(&current,rt1[i]);
               assert(rs1[i+1]!=31&&rs2[i+1]!=31);
               #ifdef REG_PREFETCH
               alloc_reg(&current,i,PTEMP);
@@ -9033,10 +9071,10 @@ int new_recompile_block(int addr)
           alloc_cc(&branch_regs[i-1],i-1);
           dirty_reg(&branch_regs[i-1],CCREG);
           alloc_reg(&branch_regs[i-1],i-1,rs1[i-1]);
-          if(rt1[i-1]==31) { // JALR
-            alloc_reg(&branch_regs[i-1],i-1,31);
-            dirty_reg(&branch_regs[i-1],31);
-            branch_regs[i-1].is32|=1LL<<31;
+          if(rt1[i-1]!=0) { // JALR
+            alloc_reg(&branch_regs[i-1],i-1,rt1[i-1]);
+            dirty_reg(&branch_regs[i-1],rt1[i-1]);
+            branch_regs[i-1].is32|=1LL<<rt1[i-1];
           }
           #ifdef USE_MINI_HT
           if(rs1[i-1]==31) { // JALR
@@ -10791,10 +10829,6 @@ int new_recompile_block(int addr)
         break;
       case 3:
         // Clear jump_out
-        #ifdef __arm__
-        if((expirep&2047)==0)
-          __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
-        #endif
         ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);
         ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift);
         break;