X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=37706bc3fbcee2ae1763dd97c9d1052bccf3be84;hp=716b1d417827e2602fe110acf76812c1d7095a02;hb=cbbd8dd7705d5cb7c748a7ffaf2ccc74893b3910;hpb=bedfea3863c3c48699048ea0d6dd07893221403c diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 716b1d41..37706bc3 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -21,10 +21,15 @@ #include #include //include for uint64_t #include +#include #include "emu_if.h" //emulator interface -#include +//#define DISASM +//#define assem_debug printf +//#define inv_debug printf +#define assem_debug(...) +#define inv_debug(...) #ifdef __i386__ #include "assem_x86.h" @@ -38,7 +43,6 @@ #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 -#define CLOCK_DIVIDER 2 struct regstat { @@ -52,7 +56,8 @@ struct regstat uint64_t uu; u_int wasconst; u_int isconst; - uint64_t constmap[HOST_REGS]; + u_int loadedconst; // host regs that have constants loaded + u_int waswritten; // MIPS regs that were used as store base before }; struct ll_entry @@ -83,7 +88,11 @@ struct ll_entry static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; - static int gte_reads_flags; // gte flag read encountered + static u_int smrv[32]; // speculated MIPS register values + static u_int smrv_strong; // mask or regs that are likely to have correct values + static u_int smrv_weak; // same, but somewhat less likely + static u_int smrv_strong_next; // same, but after current insn executes + static u_int smrv_weak_next; int imm[MAXBLOCK]; u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; @@ -96,11 +105,10 @@ struct ll_entry uint64_t p32[MAXBLOCK]; uint64_t pr32[MAXBLOCK]; signed char regmap_pre[MAXBLOCK][HOST_REGS]; - signed char regmap[MAXBLOCK][HOST_REGS]; - signed char regmap_entry[MAXBLOCK][HOST_REGS]; - uint64_t constmap[MAXBLOCK][HOST_REGS]; - struct regstat regs[MAXBLOCK]; - struct regstat branch_regs[MAXBLOCK]; + static uint64_t current_constmap[HOST_REGS]; + static uint64_t constmap[MAXBLOCK][HOST_REGS]; + static struct regstat regs[MAXBLOCK]; + static struct regstat branch_regs[MAXBLOCK]; signed char minimum_free_regs[MAXBLOCK]; u_int needed_reg[MAXBLOCK]; uint64_t requires_32bit[MAXBLOCK]; @@ -130,7 +138,8 @@ struct ll_entry #else static const u_int using_tlb=0; #endif - static u_int sp_in_mirror; + int new_dynarec_did_compile; + int new_dynarec_hacks; u_int stop_after_jal; extern u_char restore_candidate[512]; extern int cycle_count; @@ -265,11 +274,13 @@ int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 -void nullf() {} -//#define assem_debug printf -//#define inv_debug printf -#define assem_debug nullf -#define inv_debug nullf +int cycle_multiplier; // 100 for 1.0 + +static int CLOCK_ADJUST(int x) +{ + int s=(x>>31)|1; + return (x * cycle_multiplier + s * 50) / 100; +} static void tlb_hacks() { @@ -335,6 +346,7 @@ static u_int get_page(u_int vaddr) return page; } +#ifndef PCSX static u_int get_vpage(u_int vaddr) { u_int vpage=(vaddr^0x80000000)>>12; @@ -344,6 +356,13 @@ static u_int get_vpage(u_int vaddr) if(vpage>2048) vpage=2048+(vpage&2047); return vpage; } +#else +// no virtual mem in PCSX +static u_int get_vpage(u_int vaddr) +{ + return get_page(vaddr); +} +#endif // Get address from virtual address // This is called from the recompiled JR/JALR instructions @@ -376,7 +395,9 @@ void *get_addr(u_int vaddr) //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; inv_code_start=inv_code_end=~0; +#ifndef DISABLE_TLB memory_map[vaddr>>12]|=0x40000000; +#endif if(vpage<2048) { #ifndef DISABLE_TLB if(tlb_LUT_r[vaddr>>12]) { @@ -582,11 +603,11 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) for (hr=0;hrregmap[hr]==reg) { cur->isconst|=1<constmap[hr]=value; + current_constmap[hr]=value; } else if((cur->regmap[hr]^64)==reg) { cur->isconst|=1<constmap[hr]=value>>32; + current_constmap[hr]=value>>32; } } } @@ -620,7 +641,7 @@ uint64_t get_const(struct regstat *cur,signed char reg) if(!reg) return 0; for (hr=0;hrregmap[hr]==reg) { - return cur->constmap[hr]; + return current_constmap[hr]; } } printf("Unknown constant in r%d\n",reg); @@ -848,7 +869,7 @@ void alloc_all(struct regstat *cur,int i) } } - +#ifndef FORCE32 void div64(int64_t dividend,int64_t divisor) { lo=dividend/divisor; @@ -959,6 +980,7 @@ uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits) else original=loaded; return original; } +#endif #ifdef __i386__ #include "assem_x86.c" @@ -1215,7 +1237,7 @@ void invalidate_addr(u_int addr) //static int rhits; // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } - u_int page=get_page(addr); + u_int page=get_vpage(addr); if(page<2048) { // RAM struct ll_entry *head; u_int addr_min=~0, addr_max=0; @@ -1254,11 +1276,9 @@ void invalidate_addr(u_int addr) return; } else { - inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);//rhits); - } - //rhits=0; - if(page!=0) // FIXME: don't know what's up with page 0 (Klonoa) + inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0); return; + } } #endif invalidate_block(addr>>12); @@ -1333,11 +1353,13 @@ void clean_blocks(u_int page) inv|=invalid_code[i]; } } +#ifndef DISABLE_TLB if((signed int)head->vaddr>=(signed int)0xC0000000) { u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2)); //printf("addr=%x start=%x end=%x\n",addr,start,end); if(addr=end) inv=1; } +#endif else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { inv=1; } @@ -1390,8 +1412,6 @@ void mov_alloc(struct regstat *current,int i) void shiftimm_alloc(struct regstat *current,int i) { - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); if(opcode2[i]<=0x3) // SLL/SRL/SRA { if(rt1[i]) { @@ -1400,8 +1420,21 @@ void shiftimm_alloc(struct regstat *current,int i) alloc_reg(current,i,rt1[i]); current->is32|=1LL<>imm[i]); + if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]); + } + else clear_const(current,rt1[i]); } } + else + { + clear_const(current,rs1[i]); + clear_const(current,rt1[i]); + } + if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA { if(rt1[i]) { @@ -2687,7 +2720,7 @@ void shiftimm_assemble(int i,struct regstat *i_regs) t=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); //assert(t>=0); - if(t>=0){ + if(t>=0&&!((i_regs->isconst>>t)&1)){ if(rs1[i]==0) { emit_zeroreg(t); @@ -2881,23 +2914,7 @@ void load_assemble(int i,struct regstat *i_regs) if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - fastload_reg_override=HOST_TEMPREG; - } - else - #endif - emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); + jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); } } }else{ // using tlb @@ -3182,14 +3199,7 @@ void store_assemble(int i,struct regstat *i_regs) else addr=s; if(!using_tlb) { if(!c) { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - faststore_reg_override=HOST_TEMPREG; - } - else - #endif + #ifndef PCSX #ifdef R29_HACK // Strmnnrmn's speed hack if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) @@ -3212,6 +3222,9 @@ void store_assemble(int i,struct regstat *i_regs) #endif emit_jno(0); } + #else + jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); + #endif } }else{ // using tlb int x=0; @@ -3288,7 +3301,15 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } - if(!using_tlb) { +#ifdef PCSX + if(jaddr) { + // PCSX store handlers don't check invcode again + reglist|=1<waswritten&(1<waswritten&(1<regmap,ROREG); if(map<0) map=HOST_TEMPREG; @@ -3740,7 +3761,7 @@ void c1ls_assemble(int i,struct regstat *i_regs) emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp); type=STORED_STUB; } - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<>16)&0x1f; s=get_reg(i_regs->regmap,rs1[i]); @@ -3836,27 +3858,30 @@ void c2ls_assemble(int i,struct regstat *i_regs) } else { if(!c) { - emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE); - jaddr2=(int)out; - emit_jno(0); + jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override); } if (opcode[i]==0x32) { // LWC2 #ifdef HOST_IMM_ADDR32 if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl); else #endif - emit_readword_indexed(0,ar,tl); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_readword_indexed(0,a,tl); } if (opcode[i]==0x3a) { // SWC2 #ifdef DESTRUCTIVE_SHIFT if(!offset&&!c&&s>=0) emit_mov(s,ar); #endif - emit_writeword_indexed(tl,0,ar); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_writeword_indexed(tl,0,a); } } if(jaddr2) add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist); - if (opcode[i]==0x3a) { // SWC2 + if(opcode[i]==0x3a) // SWC2 + if(!(i_regs->waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -3929,7 +3954,7 @@ void syscall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_jmp((int)jump_syscall_hle); // XXX } @@ -3940,7 +3965,7 @@ void hlecall_assemble(int i,struct regstat *i_regs) assert(!is_delayslot); emit_movimm(start+i*4+4,0); // Get PC emit_movimm((int)psxHLEt[source[i]&7],1); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX emit_jmp((int)jump_hlecall); } @@ -3950,12 +3975,13 @@ void intcall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,0); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); emit_jmp((int)jump_intcall); } void ds_assemble(int i,struct regstat *i_regs) { + speculate_register_values(i); is_delayslot=1; switch(itype[i]) { case ALU: @@ -4215,6 +4241,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // printf("poor load scheduling!\n"); } else if(c) { +#ifndef DISABLE_TLB if(rm>=0) { if(!entry||entry[rm]!=mgr) { if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) { @@ -4229,6 +4256,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } } +#endif if(rs1[i]!=rt1[i]||itype[i]!=LOAD) { if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { @@ -4241,6 +4269,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i][rs]+offset,ra); + regs[i].loadedconst|=1<regmap,agr); @@ -4301,6 +4330,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i+1][rs]+offset,ra); + regs[i+1].loadedconst|=1<=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr] + &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1)) + { + regs[i].loadedconst|=1<=0) { //if(entry[hr]!=regmap[hr]) { - if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) { + if(!((regs[i].loadedconst>>hr)&1)) { if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) { - int value; + int value,similar=0; if(get_final_value(hr,i,&value)) { - if(value==0) { + // see if some other register has similar value + for(hr2=0;hr2>hr2)&1)) { + if(is_similar_value(value,constmap[i][hr2])) { + similar=1; + break; + } + } + } + if(similar) { + int value2; + if(get_final_value(hr2,i,&value2)) // is this needed? + emit_movimm_from(value2,hr2,value,hr); + else + emit_movimm(value,hr); + } + else if(value==0) { emit_zeroreg(hr); } else { emit_movimm(value,hr); } } + regs[i].loadedconst|=1<>2].regmap_entry); @@ -5272,7 +5331,7 @@ void ujump_assemble(int i,struct regstat *i_regs) if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp); #endif do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal_branch(branch_regs[i].is32,ba[i])) assem_debug("branch: internal\n"); @@ -5385,8 +5444,14 @@ void rjump_assemble(int i,struct regstat *i_regs) //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN); //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0); +#ifdef PCSX + if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) + // special case for RFE + emit_jmp(0); + else +#endif emit_jns(0); //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT @@ -5516,7 +5581,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5535,7 +5600,7 @@ void cjump_assemble(int i,struct regstat *i_regs) } } else if(nop) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5543,7 +5608,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else { int taken=0,nottaken=0,nottaken1=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5635,7 +5700,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5645,7 +5710,7 @@ void cjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -5665,7 +5730,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(nottaken1) set_jump_target(nottaken1,(int)out); if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -5769,7 +5834,7 @@ void cjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5801,7 +5866,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5810,7 +5875,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -5901,7 +5966,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5920,7 +5985,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } else if(nevertaken) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5928,7 +5993,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else { int nottaken=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5986,7 +6051,7 @@ void sjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5996,7 +6061,7 @@ void sjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -6015,7 +6080,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6098,7 +6163,7 @@ void sjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6129,7 +6194,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6138,7 +6203,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6204,7 +6269,7 @@ void fjump_assemble(int i,struct regstat *i_regs) assem_debug("cycle count (adj)\n"); if(1) { int nottaken=0; - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(1) { assert(fs>=0); emit_testimm(fs,0x800000); @@ -6231,7 +6296,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } // if(!only32) if(invert) { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK else if(match) emit_addnop(13); #endif @@ -6252,7 +6317,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6304,7 +6369,7 @@ void fjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6334,7 +6399,7 @@ void fjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6343,7 +6408,7 @@ void fjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6416,7 +6481,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG); } - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); if(opcode[i]==2) // J { unconditional=1; @@ -6715,16 +6780,20 @@ void unneeded_registers(int istart,int iend,int r) { int i; uint64_t u,uu,gte_u,b,bu,gte_bu; - uint64_t temp_u,temp_uu,temp_gte_u; + uint64_t temp_u,temp_uu,temp_gte_u=0; uint64_t tdep; + uint64_t gte_u_unknown=0; + if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED) + gte_u_unknown=~0ll; if(iend==slen-1) { u=1;uu=1; + gte_u=gte_u_unknown; }else{ u=unneeded_reg[iend+1]; uu=unneeded_reg_upper[iend+1]; u=1;uu=1; + gte_u=gte_unneeded[iend+1]; } - gte_u=temp_gte_u=0; for (i=iend;i>=istart;i--) { @@ -6739,7 +6808,7 @@ void unneeded_registers(int istart,int iend,int r) // Branch out of this block, flush all regs u=1; uu=1; - gte_u=0; + gte_u=gte_u_unknown; /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { @@ -6785,7 +6854,7 @@ void unneeded_registers(int istart,int iend,int r) { u=1; uu=1; - gte_u=0; + gte_u=gte_u_unknown; } } } @@ -6828,7 +6897,7 @@ void unneeded_registers(int istart,int iend,int r) { temp_u=1; temp_uu=1; - temp_gte_u=0; + temp_gte_u=gte_u_unknown; } } tdep=(~temp_uu>>rt1[i])&1; @@ -6850,7 +6919,7 @@ void unneeded_registers(int istart,int iend,int r) }else{ unneeded_reg[(ba[i]-start)>>2]=1; unneeded_reg_upper[(ba[i]-start)>>2]=1; - gte_unneeded[(ba[i]-start)>>2]=0; + gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) @@ -6955,6 +7024,8 @@ void unneeded_registers(int istart,int iend,int r) uu&=~(1LL<>2; for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF memory_map[n]=-1; +#endif for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); @@ -7917,6 +7990,7 @@ void new_dynarec_init() fake_pc.f.r.rd=&readmem_dword; #endif int n; + cycle_multiplier=200; new_dynarec_clear_full(); #ifdef HOST_IMM8 // Copy this into local area so we don't have to put it in every literal pool @@ -7999,12 +8073,8 @@ int new_recompile_block(int addr) //rlist(); start = (u_int)addr&~3; //assert(((u_int)addr&1)==0); + new_dynarec_did_compile=1; #ifdef PCSX - if(!sp_in_mirror&&(signed int)(psxRegs.GPR.n.sp&0xffe00000)>0x80200000&& - 0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)> 15) & 3; + gte_rs[i]&=~0xe3fll; + if(v==3) gte_rs[i]|=0xe00ll; + else gte_rs[i]|=3ll<<(v*2); + } break; case FLOAT: case FCONV: @@ -8764,6 +8836,7 @@ int new_recompile_block(int addr) dirty_reg(¤t,CCREG); current.isconst=0; current.wasconst=0; + current.waswritten=0; int ds=0; int cc=0; int hr=-1; @@ -8792,6 +8865,7 @@ int new_recompile_block(int addr) if(current.regmap[hr]==0) current.regmap[hr]=-1; } current.isconst=0; + current.waswritten=0; } if(i>1) { @@ -8856,6 +8930,7 @@ int new_recompile_block(int addr) regs[i].wasconst=current.isconst; regs[i].was32=current.is32; regs[i].wasdirty=current.dirty; + regs[i].loadedconst=0; #if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32) // To change a dirty register from 32 to 64 bits, we must write // it out during the previous cycle (for branches, 2 cycles) @@ -9418,6 +9493,14 @@ int new_recompile_block(int addr) } memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap)); } + + if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800) + current.waswritten|=1<=0x800) + current.waswritten&=~(1<0) { @@ -9438,7 +9521,7 @@ int new_recompile_block(int addr) branch_regs[i-1].is32|=1LL<<31; } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case RJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -9463,7 +9546,7 @@ int new_recompile_block(int addr) } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case CJUMP: if((opcode[i-1]&0x3E)==4) // BEQ/BNE @@ -9499,7 +9582,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ @@ -9532,7 +9615,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9598,7 +9681,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9712,7 +9795,12 @@ int new_recompile_block(int addr) { cc=0; } -#ifdef PCSX +#if defined(PCSX) && !defined(DRC_DBG) + else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2) + { + // GTE runs in parallel until accessed, divide by 2 for a rough guess + cc+=gte_cycletab[source[i]&0x3f]/2; + } else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues { cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER) @@ -9732,7 +9820,7 @@ int new_recompile_block(int addr) regs[i].is32=current.is32; regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; - memcpy(constmap[i],current.constmap,sizeof(current.constmap)); + memcpy(constmap[i],current_constmap,sizeof(current_constmap)); } for(hr=0;hr=0) { @@ -9742,6 +9830,7 @@ int new_recompile_block(int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; + regs[i].waswritten=current.waswritten; } /* Pass 4 - Cull unused host registers */ @@ -10886,9 +10975,9 @@ int new_recompile_block(int addr) if(itype[slen-1]==SPAN) { bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception } - + +#ifdef DISASM /* Debug/disassembly */ - if((void*)assem_debug==(void*)printf) for(i=0;i>16)!=0x1000)) { @@ -11276,7 +11367,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); } else if(!likely[i-2]) { @@ -11299,7 +11390,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); add_to_linker((int)out,start+i*4,0); emit_jmp(0); } @@ -11456,10 +11547,12 @@ int new_recompile_block(int addr) } inv_code_start=inv_code_end=~0; #ifdef PCSX - // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE + // for PCSX we need to mark all mirrors too if(get_page(start)<(RAM_SIZE>>12)) for(i=start>>12;i<=(start+slen*4)>>12;i++) - invalid_code[((u_int)0x80000000>>12)|i]=0; + invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif /* Pass 10 - Free memory by expiring oldest blocks */