drc: merge Ari64's patch: 11_reduce_invstub_memory_usage
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
index 1a4324d..a40acf1 100644 (file)
@@ -66,8 +66,41 @@ const u_int jump_vaddr_reg[16] = {
   0,
   0};
 
+void invalidate_addr_r0();
+void invalidate_addr_r1();
+void invalidate_addr_r2();
+void invalidate_addr_r3();
+void invalidate_addr_r4();
+void invalidate_addr_r5();
+void invalidate_addr_r6();
+void invalidate_addr_r7();
+void invalidate_addr_r8();
+void invalidate_addr_r9();
+void invalidate_addr_r10();
+void invalidate_addr_r12();
+
+const u_int invalidate_addr_reg[16] = {
+  (int)invalidate_addr_r0,
+  (int)invalidate_addr_r1,
+  (int)invalidate_addr_r2,
+  (int)invalidate_addr_r3,
+  (int)invalidate_addr_r4,
+  (int)invalidate_addr_r5,
+  (int)invalidate_addr_r6,
+  (int)invalidate_addr_r7,
+  (int)invalidate_addr_r8,
+  (int)invalidate_addr_r9,
+  (int)invalidate_addr_r10,
+  0,
+  (int)invalidate_addr_r12,
+  0,
+  0,
+  0};
+
 #include "fpu.h"
 
+unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
+
 /* Linker */
 
 void set_jump_target(int addr,u_int target)
@@ -2193,6 +2226,13 @@ void emit_addsr12(int rs1,int rs2,int rt)
   output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
 }
 
+void emit_callne(int a)
+{
+  assem_debug("blne %x\n",a);
+  u_int offset=genjmp(a);
+  output_w32(0x1b000000|offset);
+}
+
 // Used to preload hash table entries
 void emit_prefetch(void *addr)
 {
@@ -2585,9 +2625,7 @@ do_readstub(int n)
   }
   assert(rs>=0);
   if(addr<0) addr=rt;
-  if(addr<0)
-    // assume dummy read, no alloced reg
-    addr=get_reg(i_regmap,-1);
+  if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
   assert(addr>=0);
   int ftable=0;
   if(type==LOADB_STUB||type==LOADBU_STUB)
@@ -2665,10 +2703,7 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i
   int rs=get_reg(regmap,target);
   int rth=get_reg(regmap,target|64);
   int rt=get_reg(regmap,target);
-  // allow for PCSX dummy reads
-  //assert(rt>=0);
-  if(rs<0)
-    rs=get_reg(regmap,-1);
+  if(rs<0) rs=get_reg(regmap,-1);
   assert(rs>=0);
   int ftable=0;
   if(type==LOADB_STUB||type==LOADBU_STUB)
@@ -3281,57 +3316,60 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs)
     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
   }
-  if(tl>=0) {
-    //assert(tl>=0);
-    //assert(rt1[i]);
-    if(!using_tlb) {
-      if(!c) {
-        emit_shlimm(addr,3,temp);
-        if (opcode[i]==0x22||opcode[i]==0x26) {
-          emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
-        }else{
-          emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
-        }
-        emit_cmpimm(addr,RAM_SIZE);
-        jaddr=(int)out;
-        emit_jno(0);
-      }
-      else {
-        if (opcode[i]==0x22||opcode[i]==0x26) {
-          emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
-        }else{
-          emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
-        }
-      }
-    }else{ // using tlb
-      int a;
-      if(c) {
-        a=-1;
-      }else if (opcode[i]==0x22||opcode[i]==0x26) {
-        a=0xFFFFFFFC; // LWL/LWR
+  if(!using_tlb) {
+    if(!c) {
+      #ifdef RAM_OFFSET
+      map=get_reg(i_regs->regmap,ROREG);
+      if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
+      #endif
+      emit_shlimm(addr,3,temp);
+      if (opcode[i]==0x22||opcode[i]==0x26) {
+        emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
       }else{
-        a=0xFFFFFFF8; // LDL/LDR
+        emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
       }
-      map=get_reg(i_regs->regmap,TLREG);
-      assert(map>=0);
-      map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
-      if(c) {
-        if (opcode[i]==0x22||opcode[i]==0x26) {
-          emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
-        }else{
-          emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
-        }
+      emit_cmpimm(addr,RAM_SIZE);
+      jaddr=(int)out;
+      emit_jno(0);
+    }
+    else {
+      if (opcode[i]==0x22||opcode[i]==0x26) {
+        emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
+      }else{
+        emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
       }
-      do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
     }
-    if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
-      if(!c||memtarget) {
-        //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
-        emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
-        if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
+  }else{ // using tlb
+    int a;
+    if(c) {
+      a=-1;
+    }else if (opcode[i]==0x22||opcode[i]==0x26) {
+      a=0xFFFFFFFC; // LWL/LWR
+    }else{
+      a=0xFFFFFFF8; // LDL/LDR
+    }
+    map=get_reg(i_regs->regmap,TLREG);
+    assert(map>=0);
+    map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
+    if(c) {
+      if (opcode[i]==0x22||opcode[i]==0x26) {
+        emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
+      }else{
+        emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
       }
-      else
-        inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
+    }
+    do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
+  }
+  if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
+    if(!c||memtarget) {
+      //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
+      emit_readword_indexed_tlb(0,temp2,map,temp2);
+      if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
+    }
+    else
+      inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
+    if(rt1[i]) {
+      assert(tl>=0);
       emit_andimm(temp,24,temp);
 #ifdef BIG_ENDIAN_MIPS
       if (opcode[i]==0x26) // LWR
@@ -3348,19 +3386,23 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs)
         emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
       }
       emit_or(temp2,tl,tl);
-      //emit_storereg(rt1[i],tl); // DEBUG
     }
-    if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
-      // FIXME: little endian
-      int temp2h=get_reg(i_regs->regmap,FTEMP|64);
-      if(!c||memtarget) {
-        //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
-        //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
-        emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
-        if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
-      }
-      else
-        inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
+    //emit_storereg(rt1[i],tl); // DEBUG
+  }
+  if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
+    // FIXME: little endian
+    int temp2h=get_reg(i_regs->regmap,FTEMP|64);
+    if(!c||memtarget) {
+      //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
+      //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
+      emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
+      if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
+    }
+    else
+      inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
+    if(rt1[i]) {
+      assert(th>=0);
+      assert(tl>=0);
       emit_testimm(temp,32);
       emit_andimm(temp,24,temp);
       if (opcode[i]==0x1A) { // LDL
@@ -4782,6 +4824,38 @@ void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint
 #define wb_invalidate wb_invalidate_arm
 */
 
+// Clearing the cache is rather slow on ARM Linux, so mark the areas
+// that need to be cleared, and then only clear these areas once.
+void do_clear_cache()
+{
+  int i,j;
+  for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
+  {
+    u_int bitmap=needs_clear_cache[i];
+    if(bitmap) {
+      u_int start,end;
+      for(j=0;j<32;j++) 
+      {
+        if(bitmap&(1<<j)) {
+          start=BASE_ADDR+i*131072+j*4096;
+          end=start+4095;
+          j++;
+          while(j<32) {
+            if(bitmap&(1<<j)) {
+              end+=4096;
+              j++;
+            }else{
+              __clear_cache((void *)start,(void *)end);
+              break;
+            }
+          }
+        }
+      }
+      needs_clear_cache[i]=0;
+    }
+  }
+}
+
 // CPU-architecture-specific initialization
 void arch_init() {
 #ifndef DISABLE_COP1