X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=37706bc3fbcee2ae1763dd97c9d1052bccf3be84;hp=a177d881b474f7edc961c42d64e64662a088eac7;hb=cbbd8dd7705d5cb7c748a7ffaf2ccc74893b3910;hpb=eba830cd5dc35bc900ea60d8c06ee582178280a4 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index a177d881..37706bc3 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -21,10 +21,15 @@ #include #include //include for uint64_t #include +#include #include "emu_if.h" //emulator interface -#include +//#define DISASM +//#define assem_debug printf +//#define inv_debug printf +#define assem_debug(...) +#define inv_debug(...) #ifdef __i386__ #include "assem_x86.h" @@ -38,7 +43,6 @@ #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 -#define CLOCK_DIVIDER 2 struct regstat { @@ -52,7 +56,8 @@ struct regstat uint64_t uu; u_int wasconst; u_int isconst; - uint64_t constmap[HOST_REGS]; + u_int loadedconst; // host regs that have constants loaded + u_int waswritten; // MIPS regs that were used as store base before }; struct ll_entry @@ -80,6 +85,14 @@ struct ll_entry u_char dep1[MAXBLOCK]; u_char dep2[MAXBLOCK]; u_char lt1[MAXBLOCK]; + static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs + static uint64_t gte_rt[MAXBLOCK]; + static uint64_t gte_unneeded[MAXBLOCK]; + static u_int smrv[32]; // speculated MIPS register values + static u_int smrv_strong; // mask or regs that are likely to have correct values + static u_int smrv_weak; // same, but somewhat less likely + static u_int smrv_strong_next; // same, but after current insn executes + static u_int smrv_weak_next; int imm[MAXBLOCK]; u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; @@ -92,11 +105,10 @@ struct ll_entry uint64_t p32[MAXBLOCK]; uint64_t pr32[MAXBLOCK]; signed char regmap_pre[MAXBLOCK][HOST_REGS]; - signed char regmap[MAXBLOCK][HOST_REGS]; - signed char regmap_entry[MAXBLOCK][HOST_REGS]; - uint64_t constmap[MAXBLOCK][HOST_REGS]; - struct regstat regs[MAXBLOCK]; - struct regstat branch_regs[MAXBLOCK]; + static uint64_t current_constmap[HOST_REGS]; + static uint64_t constmap[MAXBLOCK][HOST_REGS]; + static struct regstat regs[MAXBLOCK]; + static struct regstat branch_regs[MAXBLOCK]; signed char minimum_free_regs[MAXBLOCK]; u_int needed_reg[MAXBLOCK]; uint64_t requires_32bit[MAXBLOCK]; @@ -126,7 +138,8 @@ struct ll_entry #else static const u_int using_tlb=0; #endif - static u_int sp_in_mirror; + int new_dynarec_did_compile; + int new_dynarec_hacks; u_int stop_after_jal; extern u_char restore_candidate[512]; extern int cycle_count; @@ -261,11 +274,13 @@ int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 -void nullf() {} -//#define assem_debug printf -//#define inv_debug printf -#define assem_debug nullf -#define inv_debug nullf +int cycle_multiplier; // 100 for 1.0 + +static int CLOCK_ADJUST(int x) +{ + int s=(x>>31)|1; + return (x * cycle_multiplier + s * 50) / 100; +} static void tlb_hacks() { @@ -331,6 +346,7 @@ static u_int get_page(u_int vaddr) return page; } +#ifndef PCSX static u_int get_vpage(u_int vaddr) { u_int vpage=(vaddr^0x80000000)>>12; @@ -340,6 +356,13 @@ static u_int get_vpage(u_int vaddr) if(vpage>2048) vpage=2048+(vpage&2047); return vpage; } +#else +// no virtual mem in PCSX +static u_int get_vpage(u_int vaddr) +{ + return get_page(vaddr); +} +#endif // Get address from virtual address // This is called from the recompiled JR/JALR instructions @@ -371,7 +394,10 @@ void *get_addr(u_int vaddr) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; +#ifndef DISABLE_TLB memory_map[vaddr>>12]|=0x40000000; +#endif if(vpage<2048) { #ifndef DISABLE_TLB if(tlb_LUT_r[vaddr>>12]) { @@ -463,6 +489,7 @@ void *get_addr_32(u_int vaddr,u_int flags) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; memory_map[vaddr>>12]|=0x40000000; if(vpage<2048) { #ifndef DISABLE_TLB @@ -576,11 +603,11 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) for (hr=0;hrregmap[hr]==reg) { cur->isconst|=1<constmap[hr]=value; + current_constmap[hr]=value; } else if((cur->regmap[hr]^64)==reg) { cur->isconst|=1<constmap[hr]=value>>32; + current_constmap[hr]=value>>32; } } } @@ -614,7 +641,7 @@ uint64_t get_const(struct regstat *cur,signed char reg) if(!reg) return 0; for (hr=0;hrregmap[hr]==reg) { - return cur->constmap[hr]; + return current_constmap[hr]; } } printf("Unknown constant in r%d\n",reg); @@ -842,7 +869,7 @@ void alloc_all(struct regstat *cur,int i) } } - +#ifndef FORCE32 void div64(int64_t dividend,int64_t divisor) { lo=dividend/divisor; @@ -953,6 +980,7 @@ uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits) else original=loaded; return original; } +#endif #ifdef __i386__ #include "assem_x86.c" @@ -1127,39 +1155,10 @@ void invalidate_page(u_int page) head=next; } } -void invalidate_block(u_int block) + +static void invalidate_block_range(u_int block, u_int first, u_int last) { u_int page=get_page(block<<12); - u_int vpage=get_vpage(block<<12); - inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); - //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); - u_int first,last; - first=last=page; - struct ll_entry *head; - head=jump_dirty[vpage]; - //printf("page=%d vpage=%d\n",page,vpage); - while(head!=NULL) { - u_int start,end; - if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision - get_bounds((int)head->addr,&start,&end); - //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { - if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { - if((((start-(u_int)rdram)>>12)&2047)>12)&2047; - if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; - } - } -#ifndef DISABLE_TLB - if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { - if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { - if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; - if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; - } - } -#endif - } - head=head->next; - } //printf("first=%d last=%d\n",first,last); invalidate_page(page); assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages) @@ -1178,9 +1177,6 @@ void invalidate_block(u_int block) // Don't trap writes invalid_code[block]=1; -#ifdef PCSX - invalid_code[((u_int)0x80000000>>12)|page]=1; -#endif #ifndef DISABLE_TLB // If there is a valid TLB entry for this page, remove write protect if(tlb_LUT_w[block]) { @@ -1198,10 +1194,96 @@ void invalidate_block(u_int block) memset(mini_ht,-1,sizeof(mini_ht)); #endif } + +void invalidate_block(u_int block) +{ + u_int page=get_page(block<<12); + u_int vpage=get_vpage(block<<12); + inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); + //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); + u_int first,last; + first=last=page; + struct ll_entry *head; + head=jump_dirty[vpage]; + //printf("page=%d vpage=%d\n",page,vpage); + while(head!=NULL) { + u_int start,end; + if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision + get_bounds((int)head->addr,&start,&end); + //printf("start: %x end: %x\n",start,end); + if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { + if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { + if((((start-(u_int)rdram)>>12)&2047)>12)&2047; + if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; + } + } +#ifndef DISABLE_TLB + if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { + if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { + if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; + if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; + } + } +#endif + } + head=head->next; + } + invalidate_block_range(block,first,last); +} + void invalidate_addr(u_int addr) { +#ifdef PCSX + //static int rhits; + // this check is done by the caller + //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } + u_int page=get_vpage(addr); + if(page<2048) { // RAM + struct ll_entry *head; + u_int addr_min=~0, addr_max=0; + int mask=RAM_SIZE-1; + int pg1; + inv_code_start=addr&~0xfff; + inv_code_end=addr|0xfff; + pg1=page; + if (pg1>0) { + // must check previous page too because of spans.. + pg1--; + inv_code_start-=0x1000; + } + for(;pg1<=page;pg1++) { + for(head=jump_dirty[pg1];head!=NULL;head=head->next) { + u_int start,end; + get_bounds((int)head->addr,&start,&end); + if((start&mask)<=(addr&mask)&&(addr&mask)<(end&mask)) { + if(startaddr_max) addr_max=end; + } + else if(addrinv_code_start) + inv_code_start=end; + } + } + } + if (addr_min!=~0) { + inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max); + inv_code_start=inv_code_end=~0; + invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12); + return; + } + else { + inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0); + return; + } + } +#endif invalidate_block(addr>>12); } + // This is called when loading a save state. // Anything could have changed, so invalidate everything. void invalidate_all_pages() @@ -1240,6 +1322,8 @@ void add_link(u_int vaddr,void *src) { u_int page=get_page(vaddr); inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page); + int *ptr=(int *)(src+4); + assert((*ptr&0x0fff0000)==0x059f0000); ll_add(jump_out+page,vaddr,src); //int ptr=get_pointer(src); //inv_debug("add_link: Pointer is to %x\n",(int)ptr); @@ -1269,11 +1353,13 @@ void clean_blocks(u_int page) inv|=invalid_code[i]; } } +#ifndef DISABLE_TLB if((signed int)head->vaddr>=(signed int)0xC0000000) { u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2)); //printf("addr=%x start=%x end=%x\n",addr,start,end); if(addr=end) inv=1; } +#endif else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { inv=1; } @@ -1326,8 +1412,6 @@ void mov_alloc(struct regstat *current,int i) void shiftimm_alloc(struct regstat *current,int i) { - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); if(opcode2[i]<=0x3) // SLL/SRL/SRA { if(rt1[i]) { @@ -1336,8 +1420,21 @@ void shiftimm_alloc(struct regstat *current,int i) alloc_reg(current,i,rt1[i]); current->is32|=1LL<>imm[i]); + if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]); + } + else clear_const(current,rt1[i]); } } + else + { + clear_const(current,rs1[i]); + clear_const(current,rt1[i]); + } + if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA { if(rt1[i]) { @@ -2623,7 +2720,7 @@ void shiftimm_assemble(int i,struct regstat *i_regs) t=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); //assert(t>=0); - if(t>=0){ + if(t>=0&&!((i_regs->isconst>>t)&1)){ if(rs1[i]==0) { emit_zeroreg(t); @@ -2817,23 +2914,7 @@ void load_assemble(int i,struct regstat *i_regs) if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - fastload_reg_override=HOST_TEMPREG; - } - else - #endif - emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); + jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); } } }else{ // using tlb @@ -3118,14 +3199,7 @@ void store_assemble(int i,struct regstat *i_regs) else addr=s; if(!using_tlb) { if(!c) { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - faststore_reg_override=HOST_TEMPREG; - } - else - #endif + #ifndef PCSX #ifdef R29_HACK // Strmnnrmn's speed hack if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) @@ -3148,6 +3222,9 @@ void store_assemble(int i,struct regstat *i_regs) #endif emit_jno(0); } + #else + jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); + #endif } }else{ // using tlb int x=0; @@ -3224,7 +3301,15 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } - if(!using_tlb) { +#ifdef PCSX + if(jaddr) { + // PCSX store handlers don't check invcode again + reglist|=1<waswritten&(1<waswritten&(1<regmap,ROREG); if(map<0) map=HOST_TEMPREG; @@ -3676,7 +3761,7 @@ void c1ls_assemble(int i,struct regstat *i_regs) emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp); type=STORED_STUB; } - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<>16)&0x1f; s=get_reg(i_regs->regmap,rs1[i]); @@ -3772,27 +3858,30 @@ void c2ls_assemble(int i,struct regstat *i_regs) } else { if(!c) { - emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE); - jaddr2=(int)out; - emit_jno(0); + jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override); } if (opcode[i]==0x32) { // LWC2 #ifdef HOST_IMM_ADDR32 if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl); else #endif - emit_readword_indexed(0,ar,tl); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_readword_indexed(0,a,tl); } if (opcode[i]==0x3a) { // SWC2 #ifdef DESTRUCTIVE_SHIFT if(!offset&&!c&&s>=0) emit_mov(s,ar); #endif - emit_writeword_indexed(tl,0,ar); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_writeword_indexed(tl,0,a); } } if(jaddr2) add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist); - if (opcode[i]==0x3a) { // SWC2 + if(opcode[i]==0x3a) // SWC2 + if(!(i_regs->waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -3865,7 +3954,7 @@ void syscall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_jmp((int)jump_syscall_hle); // XXX } @@ -3876,7 +3965,7 @@ void hlecall_assemble(int i,struct regstat *i_regs) assert(!is_delayslot); emit_movimm(start+i*4+4,0); // Get PC emit_movimm((int)psxHLEt[source[i]&7],1); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX emit_jmp((int)jump_hlecall); } @@ -3886,12 +3975,13 @@ void intcall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,0); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); emit_jmp((int)jump_intcall); } void ds_assemble(int i,struct regstat *i_regs) { + speculate_register_values(i); is_delayslot=1; switch(itype[i]) { case ALU: @@ -4151,6 +4241,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // printf("poor load scheduling!\n"); } else if(c) { +#ifndef DISABLE_TLB if(rm>=0) { if(!entry||entry[rm]!=mgr) { if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) { @@ -4165,6 +4256,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } } +#endif if(rs1[i]!=rt1[i]||itype[i]!=LOAD) { if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { @@ -4177,6 +4269,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i][rs]+offset,ra); + regs[i].loadedconst|=1<regmap,agr); @@ -4237,6 +4330,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i+1][rs]+offset,ra); + regs[i+1].loadedconst|=1<=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr] + &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1)) + { + regs[i].loadedconst|=1<=0) { //if(entry[hr]!=regmap[hr]) { - if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) { + if(!((regs[i].loadedconst>>hr)&1)) { if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) { - int value; + int value,similar=0; if(get_final_value(hr,i,&value)) { - if(value==0) { + // see if some other register has similar value + for(hr2=0;hr2>hr2)&1)) { + if(is_similar_value(value,constmap[i][hr2])) { + similar=1; + break; + } + } + } + if(similar) { + int value2; + if(get_final_value(hr2,i,&value2)) // is this needed? + emit_movimm_from(value2,hr2,value,hr); + else + emit_movimm(value,hr); + } + else if(value==0) { emit_zeroreg(hr); } else { emit_movimm(value,hr); } } + regs[i].loadedconst|=1<>2].regmap_entry); @@ -5208,7 +5331,7 @@ void ujump_assemble(int i,struct regstat *i_regs) if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp); #endif do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal_branch(branch_regs[i].is32,ba[i])) assem_debug("branch: internal\n"); @@ -5321,8 +5444,14 @@ void rjump_assemble(int i,struct regstat *i_regs) //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN); //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0); +#ifdef PCSX + if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) + // special case for RFE + emit_jmp(0); + else +#endif emit_jns(0); //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT @@ -5452,7 +5581,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5471,7 +5600,7 @@ void cjump_assemble(int i,struct regstat *i_regs) } } else if(nop) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5479,7 +5608,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else { int taken=0,nottaken=0,nottaken1=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5571,7 +5700,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5581,7 +5710,7 @@ void cjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -5601,7 +5730,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(nottaken1) set_jump_target(nottaken1,(int)out); if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -5705,7 +5834,7 @@ void cjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5737,7 +5866,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5746,7 +5875,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -5837,7 +5966,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5856,7 +5985,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } else if(nevertaken) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5864,7 +5993,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else { int nottaken=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5922,7 +6051,7 @@ void sjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5932,7 +6061,7 @@ void sjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -5951,7 +6080,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6034,7 +6163,7 @@ void sjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6065,7 +6194,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6074,7 +6203,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6140,7 +6269,7 @@ void fjump_assemble(int i,struct regstat *i_regs) assem_debug("cycle count (adj)\n"); if(1) { int nottaken=0; - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(1) { assert(fs>=0); emit_testimm(fs,0x800000); @@ -6167,7 +6296,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } // if(!only32) if(invert) { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK else if(match) emit_addnop(13); #endif @@ -6188,7 +6317,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6240,7 +6369,7 @@ void fjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6270,7 +6399,7 @@ void fjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6279,7 +6408,7 @@ void fjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6352,7 +6481,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG); } - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); if(opcode[i]==2) // J { unconditional=1; @@ -6650,16 +6779,22 @@ static void pagespan_ds() void unneeded_registers(int istart,int iend,int r) { int i; - uint64_t u,uu,b,bu; - uint64_t temp_u,temp_uu; + uint64_t u,uu,gte_u,b,bu,gte_bu; + uint64_t temp_u,temp_uu,temp_gte_u=0; uint64_t tdep; + uint64_t gte_u_unknown=0; + if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED) + gte_u_unknown=~0ll; if(iend==slen-1) { u=1;uu=1; + gte_u=gte_u_unknown; }else{ u=unneeded_reg[iend+1]; uu=unneeded_reg_upper[iend+1]; u=1;uu=1; + gte_u=gte_unneeded[iend+1]; } + for (i=iend;i>=istart;i--) { //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); @@ -6673,6 +6808,7 @@ void unneeded_registers(int istart,int iend,int r) // Branch out of this block, flush all regs u=1; uu=1; + gte_u=gte_u_unknown; /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { @@ -6704,17 +6840,21 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>rt1[i+1])&1; @@ -6741,17 +6883,21 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>rt1[i])&1; @@ -6761,8 +6907,11 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>2]=1; unneeded_reg_upper[(ba[i]-start)>>2]=1; + gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) @@ -6777,6 +6927,7 @@ void unneeded_registers(int istart,int iend,int r) // Unconditional branch u=unneeded_reg[(ba[i]-start)>>2]; uu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_u=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=u; branch_unneeded_reg_upper[i]=uu; //u=1; @@ -6791,10 +6942,13 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>2]; bu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_bu=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=b; branch_unneeded_reg_upper[i]=bu; //b=1; @@ -6809,20 +6963,25 @@ void unneeded_registers(int istart,int iend,int r) bu&=~((1LL<>2; for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF memory_map[n]=-1; +#endif for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); @@ -7822,6 +7990,7 @@ void new_dynarec_init() fake_pc.f.r.rd=&readmem_dword; #endif int n; + cycle_multiplier=200; new_dynarec_clear_full(); #ifdef HOST_IMM8 // Copy this into local area so we don't have to put it in every literal pool @@ -7904,12 +8073,8 @@ int new_recompile_block(int addr) //rlist(); start = (u_int)addr&~3; //assert(((u_int)addr&1)==0); + new_dynarec_did_compile=1; #ifdef PCSX - if(!sp_in_mirror&&(signed int)(psxRegs.GPR.n.sp&0xffe00000)>0x80200000&& - 0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)>21)&0x1f; - if (source[i]&0x3f) { + //if (op2 & 0x10) { + if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns if (gte_handlers[source[i]&0x3f]!=NULL) { - snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); + if (gte_regnames[source[i]&0x3f]!=NULL) + strcpy(insn[i],gte_regnames[source[i]&0x3f]); + else + snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); type=C2OP; } } @@ -8314,6 +8483,7 @@ int new_recompile_block(int addr) us2[i]=0; dep1[i]=0; dep2[i]=0; + gte_rs[i]=gte_rt[i]=0; switch(type) { case LOAD: rs1[i]=(source[i]>>21)&0x1f; @@ -8477,7 +8647,6 @@ int new_recompile_block(int addr) if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET break; case COP1: - case COP2: rs1[i]=0; rs2[i]=0; rt1[i]=0; @@ -8487,6 +8656,23 @@ int new_recompile_block(int addr) if(op2==5) us1[i]=rs1[i]; // DMTC1 rs2[i]=CSREG; break; + case COP2: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2 + if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2 + rs2[i]=CSREG; + int gr=(source[i]>>11)&0x1F; + switch(op2) + { + case 0x00: gte_rs[i]=1ll<>21)&0x1F; rs2[i]=CSREG; @@ -8500,6 +8686,23 @@ int new_recompile_block(int addr) rt1[i]=0; rt2[i]=0; imm[i]=(short)source[i]; + if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 + else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 + break; + case C2OP: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + gte_rs[i]=gte_reg_reads[source[i]&0x3f]; + gte_rt[i]=gte_reg_writes[source[i]&0x3f]; + gte_rt[i]|=1ll<<63; // every op changes flags + if((source[i]&0x3f)==GTE_MVMVA) { + int v = (source[i] >> 15) & 3; + gte_rs[i]&=~0xe3fll; + if(v==3) gte_rs[i]|=0xe00ll; + else gte_rs[i]|=3ll<<(v*2); + } break; case FLOAT: case FCONV: @@ -8633,6 +8836,7 @@ int new_recompile_block(int addr) dirty_reg(¤t,CCREG); current.isconst=0; current.wasconst=0; + current.waswritten=0; int ds=0; int cc=0; int hr=-1; @@ -8661,6 +8865,7 @@ int new_recompile_block(int addr) if(current.regmap[hr]==0) current.regmap[hr]=-1; } current.isconst=0; + current.waswritten=0; } if(i>1) { @@ -8725,6 +8930,7 @@ int new_recompile_block(int addr) regs[i].wasconst=current.isconst; regs[i].was32=current.is32; regs[i].wasdirty=current.dirty; + regs[i].loadedconst=0; #if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32) // To change a dirty register from 32 to 64 bits, we must write // it out during the previous cycle (for branches, 2 cycles) @@ -9287,6 +9493,14 @@ int new_recompile_block(int addr) } memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap)); } + + if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800) + current.waswritten|=1<=0x800) + current.waswritten&=~(1<0) { @@ -9307,7 +9521,7 @@ int new_recompile_block(int addr) branch_regs[i-1].is32|=1LL<<31; } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case RJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -9332,7 +9546,7 @@ int new_recompile_block(int addr) } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case CJUMP: if((opcode[i-1]&0x3E)==4) // BEQ/BNE @@ -9368,7 +9582,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ @@ -9401,7 +9615,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9467,7 +9681,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9581,7 +9795,12 @@ int new_recompile_block(int addr) { cc=0; } -#ifdef PCSX +#if defined(PCSX) && !defined(DRC_DBG) + else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2) + { + // GTE runs in parallel until accessed, divide by 2 for a rough guess + cc+=gte_cycletab[source[i]&0x3f]/2; + } else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues { cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER) @@ -9601,7 +9820,7 @@ int new_recompile_block(int addr) regs[i].is32=current.is32; regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; - memcpy(constmap[i],current.constmap,sizeof(current.constmap)); + memcpy(constmap[i],current_constmap,sizeof(current_constmap)); } for(hr=0;hr=0) { @@ -9611,6 +9830,7 @@ int new_recompile_block(int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; + regs[i].waswritten=current.waswritten; } /* Pass 4 - Cull unused host registers */ @@ -10755,9 +10975,9 @@ int new_recompile_block(int addr) if(itype[slen-1]==SPAN) { bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception } - + +#ifdef DISASM /* Debug/disassembly */ - if((void*)assem_debug==(void*)printf) for(i=0;i>16)!=0x1000)) { @@ -11145,7 +11367,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); } else if(!likely[i-2]) { @@ -11168,7 +11390,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); add_to_linker((int)out,start+i*4,0); emit_jmp(0); } @@ -11323,11 +11545,14 @@ int new_recompile_block(int addr) } #endif } + inv_code_start=inv_code_end=~0; #ifdef PCSX - // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE + // for PCSX we need to mark all mirrors too if(get_page(start)<(RAM_SIZE>>12)) for(i=start>>12;i<=(start+slen*4)>>12;i++) - invalid_code[((u_int)0x80000000>>12)|i]=0; + invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif /* Pass 10 - Free memory by expiring oldest blocks */