X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=b2eb21be491762c037baffe1c3eb36e09e3b852e;hp=893f258c774246a261576ad4283fd4c4dded3466;hb=0ff8c62ced8c9a920ac208c6d965b138c5c124dd;hpb=b1570849e812cc98973c2efc0bee1e494fa39367 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 893f258c..b2eb21be 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -21,10 +21,15 @@ #include #include //include for uint64_t #include +#include #include "emu_if.h" //emulator interface -#include +//#define DISASM +//#define assem_debug printf +//#define inv_debug printf +#define assem_debug(...) +#define inv_debug(...) #ifdef __i386__ #include "assem_x86.h" @@ -38,7 +43,9 @@ #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 -#define CLOCK_DIVIDER 2 + +int cycle_multiplier; // 100 for 1.0 +#define CLOCK_ADJUST(x) (((x) * cycle_multiplier + 50) / 100) struct regstat { @@ -52,6 +59,8 @@ struct regstat uint64_t uu; u_int wasconst; u_int isconst; + u_int loadedconst; // host regs that have constants loaded + u_int waswritten; // MIPS regs that were used as store base before uint64_t constmap[HOST_REGS]; }; @@ -80,6 +89,14 @@ struct ll_entry u_char dep1[MAXBLOCK]; u_char dep2[MAXBLOCK]; u_char lt1[MAXBLOCK]; + static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs + static uint64_t gte_rt[MAXBLOCK]; + static uint64_t gte_unneeded[MAXBLOCK]; + static u_int smrv[32]; // speculated MIPS register values + static u_int smrv_strong; // mask or regs that are likely to have correct values + static u_int smrv_weak; // same, but somewhat less likely + static u_int smrv_strong_next; // same, but after current insn executes + static u_int smrv_weak_next; int imm[MAXBLOCK]; u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; @@ -126,7 +143,8 @@ struct ll_entry #else static const u_int using_tlb=0; #endif - static u_int sp_in_mirror; + int new_dynarec_did_compile; + int new_dynarec_hacks; u_int stop_after_jal; extern u_char restore_candidate[512]; extern int cycle_count; @@ -261,12 +279,6 @@ int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 -void nullf() {} -//#define assem_debug printf -//#define inv_debug printf -#define assem_debug nullf -#define inv_debug nullf - static void tlb_hacks() { #ifndef DISABLE_TLB @@ -371,7 +383,10 @@ void *get_addr(u_int vaddr) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; +#ifndef DISABLE_TLB memory_map[vaddr>>12]|=0x40000000; +#endif if(vpage<2048) { #ifndef DISABLE_TLB if(tlb_LUT_r[vaddr>>12]) { @@ -463,6 +478,7 @@ void *get_addr_32(u_int vaddr,u_int flags) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; memory_map[vaddr>>12]|=0x40000000; if(vpage<2048) { #ifndef DISABLE_TLB @@ -842,7 +858,7 @@ void alloc_all(struct regstat *cur,int i) } } - +#ifndef FORCE32 void div64(int64_t dividend,int64_t divisor) { lo=dividend/divisor; @@ -953,6 +969,7 @@ uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits) else original=loaded; return original; } +#endif #ifdef __i386__ #include "assem_x86.c" @@ -1127,39 +1144,10 @@ void invalidate_page(u_int page) head=next; } } -void invalidate_block(u_int block) + +static void invalidate_block_range(u_int block, u_int first, u_int last) { u_int page=get_page(block<<12); - u_int vpage=get_vpage(block<<12); - inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); - //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); - u_int first,last; - first=last=page; - struct ll_entry *head; - head=jump_dirty[vpage]; - //printf("page=%d vpage=%d\n",page,vpage); - while(head!=NULL) { - u_int start,end; - if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision - get_bounds((int)head->addr,&start,&end); - //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { - if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { - if((((start-(u_int)rdram)>>12)&2047)>12)&2047; - if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; - } - } -#ifndef DISABLE_TLB - if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { - if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { - if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; - if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; - } - } -#endif - } - head=head->next; - } //printf("first=%d last=%d\n",first,last); invalidate_page(page); assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages) @@ -1178,9 +1166,6 @@ void invalidate_block(u_int block) // Don't trap writes invalid_code[block]=1; -#ifdef PCSX - invalid_code[((u_int)0x80000000>>12)|page]=1; -#endif #ifndef DISABLE_TLB // If there is a valid TLB entry for this page, remove write protect if(tlb_LUT_w[block]) { @@ -1198,10 +1183,98 @@ void invalidate_block(u_int block) memset(mini_ht,-1,sizeof(mini_ht)); #endif } + +void invalidate_block(u_int block) +{ + u_int page=get_page(block<<12); + u_int vpage=get_vpage(block<<12); + inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); + //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); + u_int first,last; + first=last=page; + struct ll_entry *head; + head=jump_dirty[vpage]; + //printf("page=%d vpage=%d\n",page,vpage); + while(head!=NULL) { + u_int start,end; + if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision + get_bounds((int)head->addr,&start,&end); + //printf("start: %x end: %x\n",start,end); + if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { + if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { + if((((start-(u_int)rdram)>>12)&2047)>12)&2047; + if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; + } + } +#ifndef DISABLE_TLB + if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { + if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { + if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; + if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; + } + } +#endif + } + head=head->next; + } + invalidate_block_range(block,first,last); +} + void invalidate_addr(u_int addr) { +#ifdef PCSX + //static int rhits; + // this check is done by the caller + //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } + u_int page=get_page(addr); + if(page<2048) { // RAM + struct ll_entry *head; + u_int addr_min=~0, addr_max=0; + int mask=RAM_SIZE-1; + int pg1; + inv_code_start=addr&~0xfff; + inv_code_end=addr|0xfff; + pg1=page; + if (pg1>0) { + // must check previous page too because of spans.. + pg1--; + inv_code_start-=0x1000; + } + for(;pg1<=page;pg1++) { + for(head=jump_dirty[pg1];head!=NULL;head=head->next) { + u_int start,end; + get_bounds((int)head->addr,&start,&end); + if((start&mask)<=(addr&mask)&&(addr&mask)<(end&mask)) { + if(startaddr_max) addr_max=end; + } + else if(addrinv_code_start) + inv_code_start=end; + } + } + } + if (addr_min!=~0) { + inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max); + inv_code_start=inv_code_end=~0; + invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12); + return; + } + else { + inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);//rhits); + } + //rhits=0; + if(page!=0) // FIXME: don't know what's up with page 0 (Klonoa) + return; + } +#endif invalidate_block(addr>>12); } + // This is called when loading a save state. // Anything could have changed, so invalidate everything. void invalidate_all_pages() @@ -1240,6 +1313,8 @@ void add_link(u_int vaddr,void *src) { u_int page=get_page(vaddr); inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page); + int *ptr=(int *)(src+4); + assert((*ptr&0x0fff0000)==0x059f0000); ll_add(jump_out+page,vaddr,src); //int ptr=get_pointer(src); //inv_debug("add_link: Pointer is to %x\n",(int)ptr); @@ -1269,11 +1344,13 @@ void clean_blocks(u_int page) inv|=invalid_code[i]; } } +#ifndef DISABLE_TLB if((signed int)head->vaddr>=(signed int)0xC0000000) { u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2)); //printf("addr=%x start=%x end=%x\n",addr,start,end); if(addr=end) inv=1; } +#endif else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { inv=1; } @@ -1326,8 +1403,6 @@ void mov_alloc(struct regstat *current,int i) void shiftimm_alloc(struct regstat *current,int i) { - clear_const(current,rs1[i]); - clear_const(current,rt1[i]); if(opcode2[i]<=0x3) // SLL/SRL/SRA { if(rt1[i]) { @@ -1336,8 +1411,21 @@ void shiftimm_alloc(struct regstat *current,int i) alloc_reg(current,i,rt1[i]); current->is32|=1LL<>imm[i]); + if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]); + } + else clear_const(current,rt1[i]); } } + else + { + clear_const(current,rs1[i]); + clear_const(current,rt1[i]); + } + if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA { if(rt1[i]) { @@ -2623,7 +2711,7 @@ void shiftimm_assemble(int i,struct regstat *i_regs) t=get_reg(i_regs->regmap,rt1[i]); s=get_reg(i_regs->regmap,rs1[i]); //assert(t>=0); - if(t>=0){ + if(t>=0&&!((i_regs->isconst>>t)&1)){ if(rs1[i]==0) { emit_zeroreg(t); @@ -2817,23 +2905,7 @@ void load_assemble(int i,struct regstat *i_regs) if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) #endif { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - fastload_reg_override=HOST_TEMPREG; - } - else - #endif - emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); + jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); } } }else{ // using tlb @@ -3118,14 +3190,7 @@ void store_assemble(int i,struct regstat *i_regs) else addr=s; if(!using_tlb) { if(!c) { - #ifdef PCSX - if(sp_in_mirror&&rs1[i]==29) { - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,RAM_SIZE); - faststore_reg_override=HOST_TEMPREG; - } - else - #endif + #ifndef PCSX #ifdef R29_HACK // Strmnnrmn's speed hack if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) @@ -3148,6 +3213,9 @@ void store_assemble(int i,struct regstat *i_regs) #endif emit_jno(0); } + #else + jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); + #endif } }else{ // using tlb int x=0; @@ -3224,7 +3292,15 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } - if(!using_tlb) { +#ifdef PCSX + if(jaddr) { + // PCSX store handlers don't check invcode again + reglist|=1<waswritten&(1<waswritten&(1<regmap,ROREG); if(map<0) map=HOST_TEMPREG; @@ -3676,7 +3752,7 @@ void c1ls_assemble(int i,struct regstat *i_regs) emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp); type=STORED_STUB; } - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<>16)&0x1f; s=get_reg(i_regs->regmap,rs1[i]); @@ -3772,27 +3849,30 @@ void c2ls_assemble(int i,struct regstat *i_regs) } else { if(!c) { - emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE); - jaddr2=(int)out; - emit_jno(0); + jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override); } if (opcode[i]==0x32) { // LWC2 #ifdef HOST_IMM_ADDR32 if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl); else #endif - emit_readword_indexed(0,ar,tl); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_readword_indexed(0,a,tl); } if (opcode[i]==0x3a) { // SWC2 #ifdef DESTRUCTIVE_SHIFT if(!offset&&!c&&s>=0) emit_mov(s,ar); #endif - emit_writeword_indexed(tl,0,ar); + int a=ar; + if(fastio_reg_override) a=fastio_reg_override; + emit_writeword_indexed(tl,0,a); } } if(jaddr2) add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist); - if (opcode[i]==0x3a) { // SWC2 + if(opcode[i]==0x3a) // SWC2 + if(!(i_regs->waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -3865,7 +3945,7 @@ void syscall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_jmp((int)jump_syscall_hle); // XXX } @@ -3876,7 +3956,7 @@ void hlecall_assemble(int i,struct regstat *i_regs) assert(!is_delayslot); emit_movimm(start+i*4+4,0); // Get PC emit_movimm((int)psxHLEt[source[i]&7],1); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX emit_jmp((int)jump_hlecall); } @@ -3886,12 +3966,13 @@ void intcall_assemble(int i,struct regstat *i_regs) assert(ccreg==HOST_CCREG); assert(!is_delayslot); emit_movimm(start+i*4,0); // Get PC - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); emit_jmp((int)jump_intcall); } void ds_assemble(int i,struct regstat *i_regs) { + speculate_register_values(i); is_delayslot=1; switch(itype[i]) { case ALU: @@ -4151,6 +4232,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // printf("poor load scheduling!\n"); } else if(c) { +#ifndef DISABLE_TLB if(rm>=0) { if(!entry||entry[rm]!=mgr) { if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) { @@ -4165,6 +4247,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } } +#endif if(rs1[i]!=rt1[i]||itype[i]!=LOAD) { if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { @@ -4177,6 +4260,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i][rs]+offset,ra); + regs[i].loadedconst|=1<regmap,agr); @@ -4237,6 +4321,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i+1][rs]+offset,ra); + regs[i+1].loadedconst|=1<=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr] + &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1)) + { + regs[i].loadedconst|=1<=0) { //if(entry[hr]!=regmap[hr]) { - if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) { + if(!((regs[i].loadedconst>>hr)&1)) { if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) { - int value; + int value,similar=0; if(get_final_value(hr,i,&value)) { - if(value==0) { + // see if some other register has similar value + for(hr2=0;hr2>hr2)&1)) { + if(is_similar_value(value,constmap[i][hr2])) { + similar=1; + break; + } + } + } + if(similar) { + int value2; + if(get_final_value(hr2,i,&value2)) // is this needed? + emit_movimm_from(value2,hr2,value,hr); + else + emit_movimm(value,hr); + } + else if(value==0) { emit_zeroreg(hr); } else { emit_movimm(value,hr); } } + regs[i].loadedconst|=1<>2].regmap_entry); @@ -5136,9 +5250,45 @@ add_to_linker(int addr,int target,int ext) linkcount++; } +static void ujump_assemble_write_ra(int i) +{ + int rt; + unsigned int return_address; + rt=get_reg(branch_regs[i].regmap,31); + assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); + //assert(rt>=0); + return_address=start+i*4+8; + if(rt>=0) { + #ifdef USE_MINI_HT + if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) { + int temp=-1; // note: must be ds-safe + #ifdef HOST_TEMPREG + temp=HOST_TEMPREG; + #endif + if(temp>=0) do_miniht_insert(return_address,rt,temp); + else emit_movimm(return_address,rt); + } + else + #endif + { + #ifdef REG_PREFETCH + if(temp>=0) + { + if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); + } + #endif + emit_movimm(return_address,rt); // PC into link register + #ifdef IMM_PREFETCH + emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); + #endif + } + } +} + void ujump_assemble(int i,struct regstat *i_regs) { signed char *i_regmap=i_regs->regmap; + int ra_done=0; if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH @@ -5150,38 +5300,9 @@ void ujump_assemble(int i,struct regstat *i_regs) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif - if(rt1[i]==31) { - int rt; - unsigned int return_address; - rt=get_reg(branch_regs[i].regmap,31); - assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); - //assert(rt>=0); - return_address=start+i*4+8; - if(rt>=0) { - #ifdef USE_MINI_HT - if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) { - int temp=-1; // note: must be ds-safe - #ifdef HOST_TEMPREG - temp=HOST_TEMPREG; - #endif - if(temp>=0) do_miniht_insert(return_address,rt,temp); - else emit_movimm(return_address,rt); - } - else - #endif - { - #ifdef REG_PREFETCH - if(temp>=0) - { - if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); - } - #endif - emit_movimm(return_address,rt); // PC into link register - #ifdef IMM_PREFETCH - emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); - #endif - } - } + if(rt1[i]==31&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) { + ujump_assemble_write_ra(i); // writeback ra for DS + ra_done=1; } ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; @@ -5191,6 +5312,8 @@ void ujump_assemble(int i,struct regstat *i_regs) wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32, bc_unneeded,bc_unneeded_upper); load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); + if(!ra_done&&rt1[i]==31) + ujump_assemble_write_ra(i); int cc,adj; cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); @@ -5199,7 +5322,7 @@ void ujump_assemble(int i,struct regstat *i_regs) if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp); #endif do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal_branch(branch_regs[i].is32,ba[i])) assem_debug("branch: internal\n"); @@ -5214,11 +5337,33 @@ void ujump_assemble(int i,struct regstat *i_regs) } } +static void rjump_assemble_write_ra(int i) +{ + int rt,return_address; + assert(rt1[i+1]!=rt1[i]); + assert(rt2[i+1]!=rt1[i]); + rt=get_reg(branch_regs[i].regmap,rt1[i]); + assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); + assert(rt>=0); + return_address=start+i*4+8; + #ifdef REG_PREFETCH + if(temp>=0) + { + if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); + } + #endif + emit_movimm(return_address,rt); // PC into link register + #ifdef IMM_PREFETCH + emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); + #endif +} + void rjump_assemble(int i,struct regstat *i_regs) { signed char *i_regmap=i_regs->regmap; int temp; int rs,cc,adj; + int ra_done=0; rs=get_reg(branch_regs[i].regmap,rs1[i]); assert(rs>=0); if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) { @@ -5245,6 +5390,10 @@ void rjump_assemble(int i,struct regstat *i_regs) if(rh>=0) do_preload_rhash(rh); } #endif + if(rt1[i]!=0&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) { + rjump_assemble_write_ra(i); + ra_done=1; + } ds_assemble(i+1,i_regs); uint64_t bc_unneeded=branch_regs[i].u; uint64_t bc_unneeded_upper=branch_regs[i].uu; @@ -5254,25 +5403,8 @@ void rjump_assemble(int i,struct regstat *i_regs) wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32, bc_unneeded,bc_unneeded_upper); load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],CCREG); - if(rt1[i]!=0) { - int rt,return_address; - assert(rt1[i+1]!=rt1[i]); - assert(rt2[i+1]!=rt1[i]); - rt=get_reg(branch_regs[i].regmap,rt1[i]); - assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); - assert(rt>=0); - return_address=start+i*4+8; - #ifdef REG_PREFETCH - if(temp>=0) - { - if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); - } - #endif - emit_movimm(return_address,rt); // PC into link register - #ifdef IMM_PREFETCH - emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]); - #endif - } + if(!ra_done&&rt1[i]!=0) + rjump_assemble_write_ra(i); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); #ifdef USE_MINI_HT @@ -5303,8 +5435,14 @@ void rjump_assemble(int i,struct regstat *i_regs) //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN); //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen //assert(adj==0); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0); +#ifdef PCSX + if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) + // special case for RFE + emit_jmp(0); + else +#endif emit_jns(0); //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT @@ -5434,7 +5572,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5453,7 +5591,7 @@ void cjump_assemble(int i,struct regstat *i_regs) } } else if(nop) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5461,7 +5599,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else { int taken=0,nottaken=0,nottaken1=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5553,7 +5691,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5563,7 +5701,7 @@ void cjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -5583,7 +5721,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(nottaken1) set_jump_target(nottaken1,(int)out); if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -5687,7 +5825,7 @@ void cjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5719,7 +5857,7 @@ void cjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5728,7 +5866,7 @@ void cjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -5819,7 +5957,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(unconditional) { do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0); if(i!=(ba[i]-start)>>2 || source[i+1]!=0) { - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -5838,7 +5976,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } else if(nevertaken) { - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -5846,7 +5984,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else { int nottaken=0; do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert); - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(!only32) { assert(s1h>=0); @@ -5904,7 +6042,7 @@ void sjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { if(adj) { - emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + emit_addimm(cc,-CLOCK_ADJUST(adj),cc); add_to_linker((int)out,ba[i],internal); }else{ emit_addnop(13); @@ -5914,7 +6052,7 @@ void sjump_assemble(int i,struct regstat *i_regs) }else #endif { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) @@ -5933,7 +6071,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6016,7 +6154,7 @@ void sjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6047,7 +6185,7 @@ void sjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6056,7 +6194,7 @@ void sjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6122,7 +6260,7 @@ void fjump_assemble(int i,struct regstat *i_regs) assem_debug("cycle count (adj)\n"); if(1) { int nottaken=0; - if(adj&&!invert) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); if(1) { assert(fs>=0); emit_testimm(fs,0x800000); @@ -6149,7 +6287,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } // if(!only32) if(invert) { - if(adj) emit_addimm(cc,-CLOCK_DIVIDER*adj,cc); + if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK else if(match) emit_addnop(13); #endif @@ -6170,7 +6308,7 @@ void fjump_assemble(int i,struct regstat *i_regs) } if(adj) { - if(!invert) emit_addimm(cc,CLOCK_DIVIDER*adj,cc); + if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc); } } // (!unconditional) } // if(ooo) @@ -6222,7 +6360,7 @@ void fjump_assemble(int i,struct regstat *i_regs) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); do_cc(i,i_regmap,&adj,ba[i],TAKEN,0); assem_debug("cycle count (adj)\n"); - if(adj) emit_addimm(cc,CLOCK_DIVIDER*(ccadj[i]+2-adj),cc); + if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc); load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); if(internal) assem_debug("branch: internal\n"); @@ -6252,7 +6390,7 @@ void fjump_assemble(int i,struct regstat *i_regs) if(cc==-1&&!likely[i]) { // Cycle count isn't in a register, temporarily load it then write it out emit_loadreg(CCREG,HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0); @@ -6261,7 +6399,7 @@ void fjump_assemble(int i,struct regstat *i_regs) else{ cc=get_reg(i_regmap,CCREG); assert(cc==HOST_CCREG); - emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc); + emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); int jaddr=(int)out; emit_jns(0); add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); @@ -6334,7 +6472,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG); } - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); if(opcode[i]==2) // J { unconditional=1; @@ -6632,16 +6770,22 @@ static void pagespan_ds() void unneeded_registers(int istart,int iend,int r) { int i; - uint64_t u,uu,b,bu; - uint64_t temp_u,temp_uu; + uint64_t u,uu,gte_u,b,bu,gte_bu; + uint64_t temp_u,temp_uu,temp_gte_u=0; uint64_t tdep; + uint64_t gte_u_unknown=0; + if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED) + gte_u_unknown=~0ll; if(iend==slen-1) { u=1;uu=1; + gte_u=gte_u_unknown; }else{ u=unneeded_reg[iend+1]; uu=unneeded_reg_upper[iend+1]; u=1;uu=1; + gte_u=gte_unneeded[iend+1]; } + for (i=iend;i>=istart;i--) { //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); @@ -6655,6 +6799,7 @@ void unneeded_registers(int istart,int iend,int r) // Branch out of this block, flush all regs u=1; uu=1; + gte_u=gte_u_unknown; /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { @@ -6686,17 +6831,21 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>rt1[i+1])&1; @@ -6723,17 +6874,21 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>rt1[i])&1; @@ -6743,8 +6898,11 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>2]=1; unneeded_reg_upper[(ba[i]-start)>>2]=1; + gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) @@ -6759,6 +6918,7 @@ void unneeded_registers(int istart,int iend,int r) // Unconditional branch u=unneeded_reg[(ba[i]-start)>>2]; uu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_u=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=u; branch_unneeded_reg_upper[i]=uu; //u=1; @@ -6773,10 +6933,13 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>2]; bu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_bu=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=b; branch_unneeded_reg_upper[i]=bu; //b=1; @@ -6791,20 +6954,25 @@ void unneeded_registers(int istart,int iend,int r) bu&=~((1LL<>2; for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF memory_map[n]=-1; +#endif for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); @@ -7804,6 +7979,7 @@ void new_dynarec_init() fake_pc.f.r.rd=&readmem_dword; #endif int n; + cycle_multiplier=200; new_dynarec_clear_full(); #ifdef HOST_IMM8 // Copy this into local area so we don't have to put it in every literal pool @@ -7886,12 +8062,8 @@ int new_recompile_block(int addr) //rlist(); start = (u_int)addr&~3; //assert(((u_int)addr&1)==0); + new_dynarec_did_compile=1; #ifdef PCSX - if(!sp_in_mirror&&(signed int)(psxRegs.GPR.n.sp&0xffe00000)>0x80200000&& - 0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)>21)&0x1f; - if (source[i]&0x3f) { + //if (op2 & 0x10) { + if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns if (gte_handlers[source[i]&0x3f]!=NULL) { - snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); + if (gte_regnames[source[i]&0x3f]!=NULL) + strcpy(insn[i],gte_regnames[source[i]&0x3f]); + else + snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); type=C2OP; } } @@ -8296,6 +8472,7 @@ int new_recompile_block(int addr) us2[i]=0; dep1[i]=0; dep2[i]=0; + gte_rs[i]=gte_rt[i]=0; switch(type) { case LOAD: rs1[i]=(source[i]>>21)&0x1f; @@ -8459,7 +8636,6 @@ int new_recompile_block(int addr) if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET break; case COP1: - case COP2: rs1[i]=0; rs2[i]=0; rt1[i]=0; @@ -8469,6 +8645,23 @@ int new_recompile_block(int addr) if(op2==5) us1[i]=rs1[i]; // DMTC1 rs2[i]=CSREG; break; + case COP2: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2 + if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2 + rs2[i]=CSREG; + int gr=(source[i]>>11)&0x1F; + switch(op2) + { + case 0x00: gte_rs[i]=1ll<>21)&0x1F; rs2[i]=CSREG; @@ -8482,6 +8675,17 @@ int new_recompile_block(int addr) rt1[i]=0; rt2[i]=0; imm[i]=(short)source[i]; + if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 + else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 + break; + case C2OP: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + gte_rs[i]=gte_reg_reads[source[i]&0x3f]; + gte_rt[i]=gte_reg_writes[source[i]&0x3f]; + gte_rt[i]|=1ll<<63; // every op changes flags break; case FLOAT: case FCONV: @@ -8615,6 +8819,7 @@ int new_recompile_block(int addr) dirty_reg(¤t,CCREG); current.isconst=0; current.wasconst=0; + current.waswritten=0; int ds=0; int cc=0; int hr=-1; @@ -8643,6 +8848,7 @@ int new_recompile_block(int addr) if(current.regmap[hr]==0) current.regmap[hr]=-1; } current.isconst=0; + current.waswritten=0; } if(i>1) { @@ -8707,6 +8913,7 @@ int new_recompile_block(int addr) regs[i].wasconst=current.isconst; regs[i].was32=current.is32; regs[i].wasdirty=current.dirty; + regs[i].loadedconst=0; #if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32) // To change a dirty register from 32 to 64 bits, we must write // it out during the previous cycle (for branches, 2 cycles) @@ -8881,8 +9088,6 @@ int new_recompile_block(int addr) clear_const(¤t,rt1[i]); alloc_cc(¤t,i); dirty_reg(¤t,CCREG); - ooo[i]=1; - delayslot_alloc(¤t,i+1); if (rt1[i]==31) { alloc_reg(¤t,i,31); dirty_reg(¤t,31); @@ -8893,6 +9098,8 @@ int new_recompile_block(int addr) #endif //current.is32|=1LL<0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800) + current.waswritten|=1<=0x800) + current.waswritten&=~(1<0) { @@ -9564,6 +9779,11 @@ int new_recompile_block(int addr) cc=0; } #ifdef PCSX + else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2) + { + // GTE runs in parallel until accessed, divide by 2 for a rough guess + cc+=gte_cycletab[source[i]&0x3f]/2; + } else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues { cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER) @@ -9593,6 +9813,7 @@ int new_recompile_block(int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; + regs[i].waswritten=current.waswritten; } /* Pass 4 - Cull unused host registers */ @@ -10737,9 +10958,9 @@ int new_recompile_block(int addr) if(itype[slen-1]==SPAN) { bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception } - + +#ifdef DISASM /* Debug/disassembly */ - if((void*)assem_debug==(void*)printf) for(i=0;i>16)!=0x1000)) { @@ -11127,7 +11350,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); } else if(!likely[i-2]) { @@ -11150,7 +11373,7 @@ int new_recompile_block(int addr) store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4); if(regs[i-1].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i-1]+1),HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG); add_to_linker((int)out,start+i*4,0); emit_jmp(0); } @@ -11305,11 +11528,14 @@ int new_recompile_block(int addr) } #endif } + inv_code_start=inv_code_end=~0; #ifdef PCSX - // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE + // for PCSX we need to mark all mirrors too if(get_page(start)<(RAM_SIZE>>12)) for(i=start>>12;i<=(start+slen*4)>>12;i++) - invalid_code[((u_int)0x80000000>>12)|i]=0; + invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= + invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif /* Pass 10 - Free memory by expiring oldest blocks */