X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=1fe2c0c4114c8f4ff1077623c022e15376c74cbf;hp=9bc0f60a4776b1f0c50b846022ddf8bd600548b8;hb=2adcd6fad4594a18025b4f00c49e43a23f8f8992;hpb=0bbd14543fec5fd4f5664b676771812663235252 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 9bc0f60a..1fe2c0c4 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -121,7 +121,12 @@ struct ll_entry char shadow[1048576] __attribute__((aligned(16))); void *copy; int expirep; +#ifndef PCSX u_int using_tlb; +#else + static const u_int using_tlb=0; +#endif + static u_int sp_in_mirror; u_int stop_after_jal; extern u_char restore_candidate[512]; extern int cycle_count; @@ -134,19 +139,21 @@ struct ll_entry #define CSREG 35 // Coprocessor status #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code -#define TEMPREG 38 -#define FTEMP 38 // FPU/LDL/LDR temporary register -#define PTEMP 39 // Prefetch temporary register -#define TLREG 40 // TLB mapping offset -#define RHASH 41 // Return address hash -#define RHTBL 42 // Return address hash table address -#define RTEMP 43 // JR/JALR address register -#define MAXREG 43 -#define AGEN1 44 // Address generation temporary register -#define AGEN2 45 // Address generation temporary register -#define MGEN1 46 // Maptable address generation temporary register -#define MGEN2 47 // Maptable address generation temporary register -#define BTREG 48 // Branch target temporary register +#define MMREG 38 // Pointer to memory_map +#define ROREG 39 // ram offset (if rdram!=0x80000000) +#define TEMPREG 40 +#define FTEMP 40 // FPU temporary register +#define PTEMP 41 // Prefetch temporary register +#define TLREG 42 // TLB mapping offset +#define RHASH 43 // Return address hash +#define RHTBL 44 // Return address hash table address +#define RTEMP 45 // JR/JALR address register +#define MAXREG 45 +#define AGEN1 46 // Address generation temporary register +#define AGEN2 47 // Address generation temporary register +#define MGEN1 48 // Maptable address generation temporary register +#define MGEN2 49 // Maptable address generation temporary register +#define BTREG 50 // Branch target temporary register /* instruction types */ #define NOP 0 // No operation @@ -1180,6 +1187,9 @@ void invalidate_block(u_int block) // Don't trap writes invalid_code[block]=1; +#ifdef PCSX + invalid_code[((u_int)0x80000000>>12)|page]=1; +#endif #ifndef DISABLE_TLB // If there is a valid TLB entry for this page, remove write protect if(tlb_LUT_w[block]) { @@ -2778,8 +2788,10 @@ void load_assemble(int i,struct regstat *i_regs) if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<=0) { c=(i_regs->wasconst>>s)&1; - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + if (c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + } } //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); @@ -2815,6 +2827,13 @@ void load_assemble(int i,struct regstat *i_regs) if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { + #ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) { + emit_andimm(addr,~0x00e00000,HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG,RAM_SIZE); + } + else + #endif emit_cmpimm(addr,RAM_SIZE); jaddr=(int)out; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -2854,6 +2873,9 @@ void load_assemble(int i,struct regstat *i_regs) else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset); #else if(!c) a=addr; +#endif +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; #endif emit_movsbl_indexed_tlb(x,a,map,tl); } @@ -2879,6 +2901,9 @@ void load_assemble(int i,struct regstat *i_regs) else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset); #else if(!c) a=addr; +#endif +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; #endif //#ifdef //emit_movswl_indexed_tlb(x,tl,map,tl); @@ -2904,13 +2929,17 @@ void load_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x23) { // LW if(!c||memtarget) { if(!dummy) { + int a=addr; +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; +#endif //emit_readword_indexed((int)rdram-0x80000000,addr,tl); #ifdef HOST_IMM_ADDR32 if(c) emit_readword_tlb(constmap[i][s]+offset,map,tl); else #endif - emit_readword_indexed_tlb(0,addr,map,tl); + emit_readword_indexed_tlb(0,a,map,tl); } if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); @@ -2936,6 +2965,9 @@ void load_assemble(int i,struct regstat *i_regs) else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset); #else if(!c) a=addr; +#endif +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; #endif emit_movzbl_indexed_tlb(x,a,map,tl); } @@ -2961,6 +2993,9 @@ void load_assemble(int i,struct regstat *i_regs) else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset); #else if(!c) a=addr; +#endif +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; #endif //#ifdef //emit_movzwl_indexed_tlb(x,tl,map,tl); @@ -2987,13 +3022,17 @@ void load_assemble(int i,struct regstat *i_regs) assert(th>=0); if(!c||memtarget) { if(!dummy) { + int a=addr; +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; +#endif //emit_readword_indexed((int)rdram-0x80000000,addr,tl); #ifdef HOST_IMM_ADDR32 if(c) emit_readword_tlb(constmap[i][s]+offset,map,tl); else #endif - emit_readword_indexed_tlb(0,addr,map,tl); + emit_readword_indexed_tlb(0,a,map,tl); } if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); @@ -3006,6 +3045,10 @@ void load_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x37) { // LD if(!c||memtarget) { if(!dummy) { + int a=addr; +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; +#endif //gen_tlb_addr_r(tl,map); //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th); //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl); @@ -3014,7 +3057,7 @@ void load_assemble(int i,struct regstat *i_regs) emit_readdword_tlb(constmap[i][s]+offset,map,th,tl); else #endif - emit_readdword_indexed_tlb(0,addr,map,th,tl); + emit_readdword_indexed_tlb(0,a,map,th,tl); } if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); @@ -3079,8 +3122,10 @@ void store_assemble(int i,struct regstat *i_regs) offset=imm[i]; if(s>=0) { c=(i_regs->wasconst>>s)&1; - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + } } assert(tl>=0); assert(temp>=0); @@ -3092,9 +3137,15 @@ void store_assemble(int i,struct regstat *i_regs) else addr=s; if(!using_tlb) { if(!c) { + #ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) { + emit_andimm(addr,~0x00e00000,HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG,RAM_SIZE); + } + else + #endif #ifdef R29_HACK // Strmnnrmn's speed hack - memtarget=1; if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif emit_cmpimm(addr,RAM_SIZE); @@ -3102,6 +3153,7 @@ void store_assemble(int i,struct regstat *i_regs) if(s==addr) emit_mov(s,temp); #endif #ifdef R29_HACK + memtarget=1; if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { @@ -3127,59 +3179,72 @@ void store_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x28) { // SB if(!c||memtarget) { - int x=0; + int x=0,a=temp; #ifdef BIG_ENDIAN_MIPS if(!c) emit_xorimm(addr,3,temp); else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset); #else - if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset); - else if (addr!=temp) emit_mov(addr,temp); + if(!c) a=addr; +#endif +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; #endif //gen_tlb_addr_w(temp,map); //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp); - emit_writebyte_indexed_tlb(tl,x,temp,map,temp); + emit_writebyte_indexed_tlb(tl,x,a,map,a); } type=STOREB_STUB; } if (opcode[i]==0x29) { // SH if(!c||memtarget) { - int x=0; + int x=0,a=temp; #ifdef BIG_ENDIAN_MIPS if(!c) emit_xorimm(addr,2,temp); else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset); #else - if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset); - else if (addr!=temp) emit_mov(addr,temp); + if(!c) a=addr; +#endif +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; #endif //#ifdef //emit_writehword_indexed_tlb(tl,x,temp,map,temp); //#else if(map>=0) { - gen_tlb_addr_w(temp,map); - emit_writehword_indexed(tl,x,temp); + gen_tlb_addr_w(a,map); + emit_writehword_indexed(tl,x,a); }else - emit_writehword_indexed(tl,(int)rdram-0x80000000+x,temp); + emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); } type=STOREH_STUB; } if (opcode[i]==0x2B) { // SW - if(!c||memtarget) + if(!c||memtarget) { + int a=addr; +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; +#endif //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr); - emit_writeword_indexed_tlb(tl,0,addr,map,temp); + emit_writeword_indexed_tlb(tl,0,a,map,temp); + } type=STOREW_STUB; } if (opcode[i]==0x3F) { // SD if(!c||memtarget) { + int a=addr; +#ifdef PCSX + if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG; +#endif if(rs2[i]) { assert(th>=0); //emit_writeword_indexed(th,(int)rdram-0x80000000,addr); //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr); - emit_writedword_indexed_tlb(th,tl,0,addr,map,temp); + emit_writedword_indexed_tlb(th,tl,0,a,map,temp); }else{ // Store zero //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp); //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp); - emit_writedword_indexed_tlb(tl,tl,0,addr,map,temp); + emit_writedword_indexed_tlb(tl,tl,0,a,map,temp); } } type=STORED_STUB; @@ -3252,7 +3317,7 @@ void storelr_assemble(int i,struct regstat *i_regs) int jaddr=0,jaddr2; int case1,case2,case3; int done0,done1,done2; - int memtarget,c=0; + int memtarget=0,c=0; int agr=AGEN1+(i&1); u_int hr,reglist=0; th=get_reg(i_regs->regmap,rs2[i]|64); @@ -3263,8 +3328,10 @@ void storelr_assemble(int i,struct regstat *i_regs) offset=imm[i]; if(s>=0) { c=(i_regs->isconst>>s)&1; - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + } } assert(tl>=0); for(hr=0;hr>16)^return_address)&0xFFFF],temp); } #endif - ds_assemble(i+1,i_regs); - uint64_t bc_unneeded=branch_regs[i].u; - uint64_t bc_unneeded_upper=branch_regs[i].uu; - bc_unneeded|=1|(1LL<=0); return_address=start+i*4+8; if(rt>=0) { #ifdef USE_MINI_HT - if(internal_branch(branch_regs[i].is32,return_address)) { - int temp=rt+1; - if(temp==EXCLUDE_REG||temp>=HOST_REGS|| - branch_regs[i].regmap[temp]>=0) - { - temp=get_reg(branch_regs[i].regmap,-1); - } + if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) { + int temp=-1; // note: must be ds-safe #ifdef HOST_TEMPREG - if(temp<0) temp=HOST_TEMPREG; + temp=HOST_TEMPREG; #endif if(temp>=0) do_miniht_insert(return_address,rt,temp); else emit_movimm(return_address,rt); @@ -5146,6 +5198,14 @@ void ujump_assemble(int i,struct regstat *i_regs) } } } + ds_assemble(i+1,i_regs); + uint64_t bc_unneeded=branch_regs[i].u; + uint64_t bc_unneeded_upper=branch_regs[i].uu; + bc_unneeded|=1|(1LL<>2; for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF memory_map[n]=-1; + for(n=0;n<4096;n++) ll_clear(jump_in+n); + for(n=0;n<4096;n++) ll_clear(jump_out+n); + for(n=0;n<4096;n++) ll_clear(jump_dirty+n); +} + +void new_dynarec_init() +{ + printf("Init new dynarec\n"); + out=(u_char *)BASE_ADDR; + if (mmap (out, 1<0x80200000&& + 0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)>26; - opcode2[i+1]=source[i+1]&0x3f; - if((0>14); else ba[i]=-1; - /* Is this the end of the block? */ - if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { #ifdef PCSX - // check for link register access in delay slot - int rt1_=rt1[i-1]; - if(rt1_!=0&&(rs1[i]==rt1_||rs2[i]==rt1_||rt1[i]==rt1_||rt2[i]==rt1_)) { - printf("link access in delay slot @%08x (%08x)\n", addr + i*4, addr); + if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) { + int do_in_intrp=0; + // branch in delay slot? + if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP||type==FJUMP) { + // don't handle first branch and call interpreter if it's hit + printf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr); + do_in_intrp=1; + } + // basic load delay detection + else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&rt1[i]!=0) { + int t=(ba[i-1]-start)/4; + if(0 <= t && t < i &&(rt1[i]==rs1[t]||rt1[i]==rs2[t])&&itype[t]!=CJUMP&&itype[t]!=SJUMP) { + // jump target wants DS result - potential load delay effect + printf("load delay @%08x (%08x)\n", addr + i*4, addr); + do_in_intrp=1; + bt[t+1]=1; // expected return from interpreter + } + else if(i>=2&&rt1[i-2]==2&&rt1[i]==2&&rs1[i]!=2&&rs2[i]!=2&&rs1[i-1]!=2&&rs2[i-1]!=2&& + !(i>=3&&(itype[i-3]==RJUMP||itype[i-3]==UJUMP||itype[i-3]==CJUMP||itype[i-3]==SJUMP))) { + // v0 overwrite like this is a sign of trouble, bail out + printf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr); + do_in_intrp=1; + } + } + if(do_in_intrp) { + rs1[i-1]=CCREG; + rs2[i-1]=rt1[i-1]=rt2[i-1]=0; ba[i-1]=-1; itype[i-1]=INTCALL; done=2; + i--; // don't compile the DS } - else + } #endif + /* Is this the end of the block? */ + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(rt1[i-1]==0) { // Continue past subroutine call (JAL) done=2; } @@ -8794,18 +8886,18 @@ int new_recompile_block(int addr) clear_const(¤t,rt1[i]); alloc_cc(¤t,i); dirty_reg(¤t,CCREG); + ooo[i]=1; + delayslot_alloc(¤t,i+1); if (rt1[i]==31) { alloc_reg(¤t,i,31); dirty_reg(¤t,31); - assert(rs1[i+1]!=31&&rs2[i+1]!=31); - assert(rt1[i+1]!=rt1[i]); + //assert(rs1[i+1]!=31&&rs2[i+1]!=31); + //assert(rt1[i+1]!=rt1[i]); #ifdef REG_PREFETCH alloc_reg(¤t,i,PTEMP); #endif //current.is32|=1LL<>12)) + for(i=start>>12;i<=(start+slen*4)>>12;i++) + invalid_code[((u_int)0x80000000>>12)|i]=0; +#endif /* Pass 10 - Free memory by expiring oldest blocks */