X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=59d42080cb7cac75c0dc8f2923f68f2a46549595;hp=74451c7d7215b498410163852c0af90e99d2c7ab;hb=4a35de071887026bb6dcd6b852738a1866959df7;hpb=76f71c2748608a51e1f9a49273eb3ff58e715700 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 74451c7d..59d42080 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -21,10 +21,15 @@ #include #include //include for uint64_t #include +#include #include "emu_if.h" //emulator interface -#include +//#define DISASM +//#define assem_debug printf +//#define inv_debug printf +#define assem_debug(...) +#define inv_debug(...) #ifdef __i386__ #include "assem_x86.h" @@ -38,7 +43,6 @@ #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 -#define CLOCK_DIVIDER 2 struct regstat { @@ -52,7 +56,8 @@ struct regstat uint64_t uu; u_int wasconst; u_int isconst; - uint64_t constmap[HOST_REGS]; + u_int loadedconst; // host regs that have constants loaded + u_int waswritten; // MIPS regs that were used as store base before }; struct ll_entry @@ -80,6 +85,14 @@ struct ll_entry u_char dep1[MAXBLOCK]; u_char dep2[MAXBLOCK]; u_char lt1[MAXBLOCK]; + static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs + static uint64_t gte_rt[MAXBLOCK]; + static uint64_t gte_unneeded[MAXBLOCK]; + static u_int smrv[32]; // speculated MIPS register values + static u_int smrv_strong; // mask or regs that are likely to have correct values + static u_int smrv_weak; // same, but somewhat less likely + static u_int smrv_strong_next; // same, but after current insn executes + static u_int smrv_weak_next; int imm[MAXBLOCK]; u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; @@ -92,11 +105,10 @@ struct ll_entry uint64_t p32[MAXBLOCK]; uint64_t pr32[MAXBLOCK]; signed char regmap_pre[MAXBLOCK][HOST_REGS]; - signed char regmap[MAXBLOCK][HOST_REGS]; - signed char regmap_entry[MAXBLOCK][HOST_REGS]; - uint64_t constmap[MAXBLOCK][HOST_REGS]; - struct regstat regs[MAXBLOCK]; - struct regstat branch_regs[MAXBLOCK]; + static uint64_t current_constmap[HOST_REGS]; + static uint64_t constmap[MAXBLOCK][HOST_REGS]; + static struct regstat regs[MAXBLOCK]; + static struct regstat branch_regs[MAXBLOCK]; signed char minimum_free_regs[MAXBLOCK]; u_int needed_reg[MAXBLOCK]; uint64_t requires_32bit[MAXBLOCK]; @@ -126,8 +138,14 @@ struct ll_entry #else static const u_int using_tlb=0; #endif - static u_int sp_in_mirror; + int new_dynarec_did_compile; + int new_dynarec_hacks; u_int stop_after_jal; +#ifndef RAM_FIXED + static u_int ram_offset; +#else + static const u_int ram_offset=0; +#endif extern u_char restore_candidate[512]; extern int cycle_count; @@ -261,11 +279,13 @@ int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 -void nullf() {} -//#define assem_debug printf -//#define inv_debug printf -#define assem_debug nullf -#define inv_debug nullf +int cycle_multiplier; // 100 for 1.0 + +static int CLOCK_ADJUST(int x) +{ + int s=(x>>31)|1; + return (x * cycle_multiplier + s * 50) / 100; +} static void tlb_hacks() { @@ -331,6 +351,7 @@ static u_int get_page(u_int vaddr) return page; } +#ifndef PCSX static u_int get_vpage(u_int vaddr) { u_int vpage=(vaddr^0x80000000)>>12; @@ -340,6 +361,13 @@ static u_int get_vpage(u_int vaddr) if(vpage>2048) vpage=2048+(vpage&2047); return vpage; } +#else +// no virtual mem in PCSX +static u_int get_vpage(u_int vaddr) +{ + return get_page(vaddr); +} +#endif // Get address from virtual address // This is called from the recompiled JR/JALR instructions @@ -371,7 +399,10 @@ void *get_addr(u_int vaddr) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; +#ifndef DISABLE_TLB memory_map[vaddr>>12]|=0x40000000; +#endif if(vpage<2048) { #ifndef DISABLE_TLB if(tlb_LUT_r[vaddr>>12]) { @@ -463,6 +494,7 @@ void *get_addr_32(u_int vaddr,u_int flags) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; memory_map[vaddr>>12]|=0x40000000; if(vpage<2048) { #ifndef DISABLE_TLB @@ -576,11 +608,11 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) for (hr=0;hrregmap[hr]==reg) { cur->isconst|=1<constmap[hr]=value; + current_constmap[hr]=value; } else if((cur->regmap[hr]^64)==reg) { cur->isconst|=1<constmap[hr]=value>>32; + current_constmap[hr]=value>>32; } } } @@ -614,7 +646,7 @@ uint64_t get_const(struct regstat *cur,signed char reg) if(!reg) return 0; for (hr=0;hrregmap[hr]==reg) { - return cur->constmap[hr]; + return current_constmap[hr]; } } printf("Unknown constant in r%d\n",reg); @@ -842,7 +874,7 @@ void alloc_all(struct regstat *cur,int i) } } - +#ifndef FORCE32 void div64(int64_t dividend,int64_t divisor) { lo=dividend/divisor; @@ -953,6 +985,7 @@ uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits) else original=loaded; return original; } +#endif #ifdef __i386__ #include "assem_x86.c" @@ -1127,39 +1160,10 @@ void invalidate_page(u_int page) head=next; } } -void invalidate_block(u_int block) + +static void invalidate_block_range(u_int block, u_int first, u_int last) { u_int page=get_page(block<<12); - u_int vpage=get_vpage(block<<12); - inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); - //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); - u_int first,last; - first=last=page; - struct ll_entry *head; - head=jump_dirty[vpage]; - //printf("page=%d vpage=%d\n",page,vpage); - while(head!=NULL) { - u_int start,end; - if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision - get_bounds((int)head->addr,&start,&end); - //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { - if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { - if((((start-(u_int)rdram)>>12)&2047)>12)&2047; - if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; - } - } -#ifndef DISABLE_TLB - if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { - if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { - if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; - if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; - } - } -#endif - } - head=head->next; - } //printf("first=%d last=%d\n",first,last); invalidate_page(page); assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages) @@ -1178,9 +1182,6 @@ void invalidate_block(u_int block) // Don't trap writes invalid_code[block]=1; -#ifdef PCSX - invalid_code[((u_int)0x80000000>>12)|page]=1; -#endif #ifndef DISABLE_TLB // If there is a valid TLB entry for this page, remove write protect if(tlb_LUT_w[block]) { @@ -1198,10 +1199,103 @@ void invalidate_block(u_int block) memset(mini_ht,-1,sizeof(mini_ht)); #endif } + +void invalidate_block(u_int block) +{ + u_int page=get_page(block<<12); + u_int vpage=get_vpage(block<<12); + inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); + //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); + u_int first,last; + first=last=page; + struct ll_entry *head; + head=jump_dirty[vpage]; + //printf("page=%d vpage=%d\n",page,vpage); + while(head!=NULL) { + u_int start,end; + if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision + get_bounds((int)head->addr,&start,&end); + //printf("start: %x end: %x\n",start,end); + if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) { + if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { + if((((start-(u_int)rdram)>>12)&2047)>12)&2047; + if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; + } + } +#ifndef DISABLE_TLB + if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { + if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { + if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; + if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; + } + } +#endif + } + head=head->next; + } + invalidate_block_range(block,first,last); +} + void invalidate_addr(u_int addr) { +#ifdef PCSX + //static int rhits; + // this check is done by the caller + //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } + u_int page=get_vpage(addr); + if(page<2048) { // RAM + struct ll_entry *head; + u_int addr_min=~0, addr_max=0; + u_int mask=RAM_SIZE-1; + u_int addr_main=0x80000000|(addr&mask); + int pg1; + inv_code_start=addr_main&~0xfff; + inv_code_end=addr_main|0xfff; + pg1=page; + if (pg1>0) { + // must check previous page too because of spans.. + pg1--; + inv_code_start-=0x1000; + } + for(;pg1<=page;pg1++) { + for(head=jump_dirty[pg1];head!=NULL;head=head->next) { + u_int start,end; + get_bounds((int)head->addr,&start,&end); + if(ram_offset) { + start-=ram_offset; + end-=ram_offset; + } + if(start<=addr_main&&addr_mainaddr_max) addr_max=end; + } + else if(addr_maininv_code_start) + inv_code_start=end; + } + } + } + if (addr_min!=~0) { + inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max); + inv_code_start=inv_code_end=~0; + invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12); + return; + } + else { + inv_code_start=(addr&~mask)|(inv_code_start&mask); + inv_code_end=(addr&~mask)|(inv_code_end&mask); + inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0); + return; + } + } +#endif invalidate_block(addr>>12); } + // This is called when loading a save state. // Anything could have changed, so invalidate everything. void invalidate_all_pages() @@ -1271,11 +1365,13 @@ void clean_blocks(u_int page) inv|=invalid_code[i]; } } +#ifndef DISABLE_TLB if((signed int)head->vaddr>=(signed int)0xC0000000) { u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2)); //printf("addr=%x start=%x end=%x\n",addr,start,end); if(addr=end) inv=1; } +#endif else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { inv=1; } @@ -1328,8 +1424,6 @@ void mov_alloc(struct regstat *current,int i) void shiftimm_alloc(struct regstat *current,int i) { - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); if(opcode2[i]<=0x3) // SLL/SRL/SRA { if(rt1[i]) { @@ -1338,8 +1432,21 @@ void shiftimm_alloc(struct regstat *current,int i) alloc_reg(current,i,rt1[i]); current->is32|=1LL<>imm[i]); + if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]); + } + else clear_const(current,rt1[i]); } } + else + { + clear_const(current,rs1[i]); + clear_const(current,rt1[i]); + } + if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA { if(rt1[i]) { @@ -2625,7 +2732,7 @@ void shiftimm_assemble(int i,struct regstat *i_regs) t=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); //assert(t>=0); - if(t>=0){ + if(t>=0&&!((i_regs->isconst>>t)&1)){ if(rs1[i]==0) { emit_zeroreg(t); @@ -2819,25 +2926,13 @@ void load_assemble(int i,struct regstat *i_regs) if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - fastload_reg_override=HOST_TEMPREG; - } - else - #endif - emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); + jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); } } + else if(ram_offset&&memtarget) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + fastload_reg_override=HOST_TEMPREG; + } }else{ // using tlb int x=0; if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU @@ -2903,7 +2998,7 @@ void load_assemble(int i,struct regstat *i_regs) gen_tlb_addr_r(a,map); emit_movswl_indexed(x,a,tl); }else{ - #ifdef RAM_OFFSET + #if 1 //def RAM_OFFSET emit_movswl_indexed(x,a,tl); #else emit_movswl_indexed((int)rdram-0x80000000+x,a,tl); @@ -2990,7 +3085,7 @@ void load_assemble(int i,struct regstat *i_regs) gen_tlb_addr_r(a,map); emit_movzwl_indexed(x,a,tl); }else{ - #ifdef RAM_OFFSET + #if 1 //def RAM_OFFSET emit_movzwl_indexed(x,a,tl); #else emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl); @@ -3120,14 +3215,7 @@ void store_assemble(int i,struct regstat *i_regs) else addr=s; if(!using_tlb) { if(!c) { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - faststore_reg_override=HOST_TEMPREG; - } - else - #endif + #ifndef PCSX #ifdef R29_HACK // Strmnnrmn's speed hack if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) @@ -3150,6 +3238,13 @@ void store_assemble(int i,struct regstat *i_regs) #endif emit_jno(0); } + #else + jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); + #endif + } + else if(ram_offset&&memtarget) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + faststore_reg_override=HOST_TEMPREG; } }else{ // using tlb int x=0; @@ -3195,7 +3290,8 @@ void store_assemble(int i,struct regstat *i_regs) gen_tlb_addr_w(a,map); emit_writehword_indexed(tl,x,a); }else - emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); + //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); + emit_writehword_indexed(tl,x,a); } type=STOREH_STUB; } @@ -3226,7 +3322,15 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } - if(!using_tlb) { +#ifdef PCSX + if(jaddr) { + // PCSX store handlers don't check invcode again + reglist|=1<waswritten&(1<regmap,rs2[i],ccadj[i],reglist); + inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist); + } + // basic current block modification detection.. + // not looking back as that should be in mips cache already + if(c&&start+i*4regmap==regs[i].regmap); // not delay slot + if(i_regs->regmap==regs[i].regmap) { + load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); + wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); + emit_movimm(start+i*4+4,0); + emit_writeword(0,(int)&pcaddr); + emit_jmp((int)do_interrupt); + } } //if(opcode[i]==0x2B || opcode[i]==0x3F) //if(opcode[i]==0x2B || opcode[i]==0x28) @@ -3500,7 +3618,7 @@ void storelr_assemble(int i,struct regstat *i_regs) } if(!c||!memtarget) add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist); - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<regmap,ROREG); if(map<0) map=HOST_TEMPREG; @@ -3678,7 +3796,7 @@ void c1ls_assemble(int i,struct regstat *i_regs) emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp); type=STORED_STUB; } - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<>16)&0x1f; s=get_reg(i_regs->regmap,rs1[i]); @@ -3774,27 +3893,34 @@ void c2ls_assemble(int i,struct regstat *i_regs) } else { if(!c) { - emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE); - jaddr2=(int)out; - emit_jno(0); + jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override); + } + else if(ram_offset&&memtarget) { + emit_addimm(ar,ram_offset,HOST_TEMPREG); + fastio_reg_override=HOST_TEMPREG; } if (opcode[i]==0x32) { // LWC2 #ifdef HOST_IMM_ADDR32 if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl); else #endif - emit_readword_indexed(0,ar,tl); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_readword_indexed(0,a,tl); } if (opcode[i]==0x3a) { // SWC2 #ifdef DESTRUCTIVE_SHIFT if(!offset&&!c&&s>=0) emit_mov(s,ar); #endif - emit_writeword_indexed(tl,0,ar); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_writeword_indexed(tl,0,a); } } if(jaddr2) add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist); - if (opcode[i]==0x3a) { // SWC2 + if(opcode[i]==0x3a) // SWC2 + if(!(i_regs->waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -3867,7 +3993,7 @@ void syscall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_jmp((int)jump_syscall_hle); // XXX } @@ -3878,7 +4004,7 @@ void hlecall_assemble(int i,struct regstat *i_regs) assert(!is_delayslot); emit_movimm(start+i*4+4,0); // Get PC emit_movimm((int)psxHLEt[source[i]&7],1); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX emit_jmp((int)jump_hlecall); } @@ -3888,12 +4014,13 @@ void intcall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,0); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); emit_jmp((int)jump_intcall); } void ds_assemble(int i,struct regstat *i_regs) { + speculate_register_values(i); is_delayslot=1; switch(itype[i]) { case ALU: @@ -4153,6 +4280,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // printf("poor load scheduling!\n"); } else if(c) { +#ifndef DISABLE_TLB if(rm>=0) { if(!entry||entry[rm]!=mgr) { if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) { @@ -4167,6 +4295,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } } +#endif if(rs1[i]!=rt1[i]||itype[i]!=LOAD) { if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { @@ -4179,6 +4308,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i][rs]+offset,ra); + regs[i].loadedconst|=1<regmap,agr); @@ -4239,6 +4369,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i+1][rs]+offset,ra); + regs[i+1].loadedconst|=1<=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr] + &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1)) + { + regs[i].loadedconst|=1<=0) { //if(entry[hr]!=regmap[hr]) { - if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) { + if(!((regs[i].loadedconst>>hr)&1)) { if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) { - int value; + int value,similar=0; if(get_final_value(hr,i,&value)) { - if(value==0) { + // see if some other register has similar value + for(hr2=0;hr2>hr2)&1)) { + if(is_similar_value(value,constmap[i][hr2])) { + similar=1; + break; + } + } + } + if(similar) { + int value2; + if(get_final_value(hr2,i,&value2)) // is this needed? + emit_movimm_from(value2,hr2,value,hr); + else + emit_movimm(value,hr); + } + else if(value==0) { emit_zeroreg(hr); } else { emit_movimm(value,hr); } } + regs[i].loadedconst|=1<>2].regmap_entry); @@ -5210,7 +5370,7 @@ void ujump_assemble(int i,struct regstat *i_regs) if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp); #endif do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal_branch(branch_regs[i].is32,ba[i])) assem_debug("branch: internal\n"); @@ -5323,8 +5483,14 @@ void rjump_assemble(int i,struct regstat *i_regs) //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN); //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0); +#ifdef PCSX + if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) + // special case for RFE + emit_jmp(0); + else +#endif emit_jns(0); //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT @@ -5454,7 +5620,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5473,7 +5639,7 @@ void cjump_assemble(int i,struct regstat *i_regs) } } else if(nop) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5481,7 +5647,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else { int taken=0,nottaken=0,nottaken1=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5573,7 +5739,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5583,7 +5749,7 @@ void cjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -5603,7 +5769,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(nottaken1) set_jump_target(nottaken1,(int)out); if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -5707,7 +5873,7 @@ void cjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5739,7 +5905,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5748,7 +5914,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -5839,7 +6005,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5858,7 +6024,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } else if(nevertaken) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5866,7 +6032,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else { int nottaken=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5924,7 +6090,7 @@ void sjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5934,7 +6100,7 @@ void sjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -5953,7 +6119,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6036,7 +6202,7 @@ void sjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6067,7 +6233,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6076,7 +6242,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6142,7 +6308,7 @@ void fjump_assemble(int i,struct regstat *i_regs) assem_debug("cycle count (adj)\n"); if(1) { int nottaken=0; - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(1) { assert(fs>=0); emit_testimm(fs,0x800000); @@ -6169,7 +6335,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } // if(!only32) if(invert) { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK else if(match) emit_addnop(13); #endif @@ -6190,7 +6356,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6242,7 +6408,7 @@ void fjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6272,7 +6438,7 @@ void fjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6281,7 +6447,7 @@ void fjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6354,7 +6520,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG); } - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); if(opcode[i]==2) // J { unconditional=1; @@ -6652,16 +6818,22 @@ static void pagespan_ds() void unneeded_registers(int istart,int iend,int r) { int i; - uint64_t u,uu,b,bu; - uint64_t temp_u,temp_uu; + uint64_t u,uu,gte_u,b,bu,gte_bu; + uint64_t temp_u,temp_uu,temp_gte_u=0; uint64_t tdep; + uint64_t gte_u_unknown=0; + if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED) + gte_u_unknown=~0ll; if(iend==slen-1) { u=1;uu=1; + gte_u=gte_u_unknown; }else{ u=unneeded_reg[iend+1]; uu=unneeded_reg_upper[iend+1]; u=1;uu=1; + gte_u=gte_unneeded[iend+1]; } + for (i=iend;i>=istart;i--) { //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); @@ -6675,6 +6847,7 @@ void unneeded_registers(int istart,int iend,int r) // Branch out of this block, flush all regs u=1; uu=1; + gte_u=gte_u_unknown; /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { @@ -6706,17 +6879,21 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>rt1[i+1])&1; @@ -6743,17 +6922,21 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>rt1[i])&1; @@ -6763,8 +6946,11 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>2]=1; unneeded_reg_upper[(ba[i]-start)>>2]=1; + gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) @@ -6779,6 +6966,7 @@ void unneeded_registers(int istart,int iend,int r) // Unconditional branch u=unneeded_reg[(ba[i]-start)>>2]; uu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_u=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=u; branch_unneeded_reg_upper[i]=uu; //u=1; @@ -6793,10 +6981,13 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>2]; bu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_bu=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=b; branch_unneeded_reg_upper[i]=bu; //b=1; @@ -6811,20 +7002,25 @@ void unneeded_registers(int istart,int iend,int r) bu&=~((1LL<>2; for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF memory_map[n]=-1; +#endif for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); @@ -7813,10 +8018,16 @@ void new_dynarec_init() { printf("Init new dynarec\n"); out=(u_char *)BASE_ADDR; +#if BASE_ADDR_FIXED if (mmap (out, 1<0x80200000&& - 0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)>21)&0x1f; - if (source[i]&0x3f) { + //if (op2 & 0x10) { + if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns if (gte_handlers[source[i]&0x3f]!=NULL) { - snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); + if (gte_regnames[source[i]&0x3f]!=NULL) + strcpy(insn[i],gte_regnames[source[i]&0x3f]); + else + snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); type=C2OP; } } @@ -8316,6 +8533,7 @@ int new_recompile_block(int addr) us2[i]=0; dep1[i]=0; dep2[i]=0; + gte_rs[i]=gte_rt[i]=0; switch(type) { case LOAD: rs1[i]=(source[i]>>21)&0x1f; @@ -8479,7 +8697,6 @@ int new_recompile_block(int addr) if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET break; case COP1: - case COP2: rs1[i]=0; rs2[i]=0; rt1[i]=0; @@ -8489,6 +8706,23 @@ int new_recompile_block(int addr) if(op2==5) us1[i]=rs1[i]; // DMTC1 rs2[i]=CSREG; break; + case COP2: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2 + if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2 + rs2[i]=CSREG; + int gr=(source[i]>>11)&0x1F; + switch(op2) + { + case 0x00: gte_rs[i]=1ll<>21)&0x1F; rs2[i]=CSREG; @@ -8502,6 +8736,23 @@ int new_recompile_block(int addr) rt1[i]=0; rt2[i]=0; imm[i]=(short)source[i]; + if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 + else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 + break; + case C2OP: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + gte_rs[i]=gte_reg_reads[source[i]&0x3f]; + gte_rt[i]=gte_reg_writes[source[i]&0x3f]; + gte_rt[i]|=1ll<<63; // every op changes flags + if((source[i]&0x3f)==GTE_MVMVA) { + int v = (source[i] >> 15) & 3; + gte_rs[i]&=~0xe3fll; + if(v==3) gte_rs[i]|=0xe00ll; + else gte_rs[i]|=3ll<<(v*2); + } break; case FLOAT: case FCONV: @@ -8635,6 +8886,7 @@ int new_recompile_block(int addr) dirty_reg(¤t,CCREG); current.isconst=0; current.wasconst=0; + current.waswritten=0; int ds=0; int cc=0; int hr=-1; @@ -8663,6 +8915,7 @@ int new_recompile_block(int addr) if(current.regmap[hr]==0) current.regmap[hr]=-1; } current.isconst=0; + current.waswritten=0; } if(i>1) { @@ -8727,6 +8980,7 @@ int new_recompile_block(int addr) regs[i].wasconst=current.isconst; regs[i].was32=current.is32; regs[i].wasdirty=current.dirty; + regs[i].loadedconst=0; #if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32) // To change a dirty register from 32 to 64 bits, we must write // it out during the previous cycle (for branches, 2 cycles) @@ -9289,6 +9543,14 @@ int new_recompile_block(int addr) } memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap)); } + + if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800) + current.waswritten|=1<=0x800) + current.waswritten&=~(1<0) { @@ -9309,7 +9571,7 @@ int new_recompile_block(int addr) branch_regs[i-1].is32|=1LL<<31; } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case RJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -9334,7 +9596,7 @@ int new_recompile_block(int addr) } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case CJUMP: if((opcode[i-1]&0x3E)==4) // BEQ/BNE @@ -9370,7 +9632,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ @@ -9403,7 +9665,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9469,7 +9731,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9583,7 +9845,12 @@ int new_recompile_block(int addr) { cc=0; } -#ifdef PCSX +#if defined(PCSX) && !defined(DRC_DBG) + else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2) + { + // GTE runs in parallel until accessed, divide by 2 for a rough guess + cc+=gte_cycletab[source[i]&0x3f]/2; + } else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues { cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER) @@ -9603,7 +9870,7 @@ int new_recompile_block(int addr) regs[i].is32=current.is32; regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; - memcpy(constmap[i],current.constmap,sizeof(current.constmap)); + memcpy(constmap[i],current_constmap,sizeof(current_constmap)); } for(hr=0;hr=0) { @@ -9613,6 +9880,7 @@ int new_recompile_block(int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; + regs[i].waswritten=current.waswritten; } /* Pass 4 - Cull unused host registers */ @@ -10757,9 +11025,9 @@ int new_recompile_block(int addr) if(itype[slen-1]==SPAN) { bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception } - + +#ifdef DISASM /* Debug/disassembly */ - if((void*)assem_debug==(void*)printf) for(i=0;i>16)!=0x1000)) { @@ -11147,7 +11417,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); } else if(!likely[i-2]) { @@ -11170,7 +11440,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); add_to_linker((int)out,start+i*4,0); emit_jmp(0); } @@ -11309,7 +11579,7 @@ int new_recompile_block(int addr) // If we're within 256K of the end of the buffer, // start over from the beginning. (Is 256K enough?) - if((int)out>BASE_ADDR+(1<(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { @@ -11325,20 +11595,23 @@ int new_recompile_block(int addr) } #endif } + inv_code_start=inv_code_end=~0; #ifdef PCSX - // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE + // for PCSX we need to mark all mirrors too if(get_page(start)<(RAM_SIZE>>12)) for(i=start>>12;i<=(start+slen*4)>>12;i++) - invalid_code[((u_int)0x80000000>>12)|i]=0; + invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif /* Pass 10 - Free memory by expiring oldest blocks */ - int end=((((int)out-BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - int base=BASE_ADDR+((expirep>>13)<>13)<>11)&3) {