X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=59d42080cb7cac75c0dc8f2923f68f2a46549595;hp=ce77e00e9ab487427da036824b6cf8d375570895;hb=4a35de071887026bb6dcd6b852738a1866959df7;hpb=2167bef61daaeb12ceb59c3c577fc636e9011d6d diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ce77e00e..59d42080 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -44,9 +44,6 @@ #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 -int cycle_multiplier; // 100 for 1.0 -#define CLOCK_ADJUST(x) (((x) * cycle_multiplier + 50) / 100) - struct regstat { signed char regmap_entry[HOST_REGS]; @@ -59,7 +56,8 @@ struct regstat uint64_t uu; u_int wasconst; u_int isconst; - uint64_t constmap[HOST_REGS]; + u_int loadedconst; // host regs that have constants loaded + u_int waswritten; // MIPS regs that were used as store base before }; struct ll_entry @@ -90,7 +88,6 @@ struct ll_entry static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; - static int gte_reads_flags; // gte flag read encountered static u_int smrv[32]; // speculated MIPS register values static u_int smrv_strong; // mask or regs that are likely to have correct values static u_int smrv_weak; // same, but somewhat less likely @@ -108,11 +105,10 @@ struct ll_entry uint64_t p32[MAXBLOCK]; uint64_t pr32[MAXBLOCK]; signed char regmap_pre[MAXBLOCK][HOST_REGS]; - signed char regmap[MAXBLOCK][HOST_REGS]; - signed char regmap_entry[MAXBLOCK][HOST_REGS]; - uint64_t constmap[MAXBLOCK][HOST_REGS]; - struct regstat regs[MAXBLOCK]; - struct regstat branch_regs[MAXBLOCK]; + static uint64_t current_constmap[HOST_REGS]; + static uint64_t constmap[MAXBLOCK][HOST_REGS]; + static struct regstat regs[MAXBLOCK]; + static struct regstat branch_regs[MAXBLOCK]; signed char minimum_free_regs[MAXBLOCK]; u_int needed_reg[MAXBLOCK]; uint64_t requires_32bit[MAXBLOCK]; @@ -143,7 +139,13 @@ struct ll_entry static const u_int using_tlb=0; #endif int new_dynarec_did_compile; + int new_dynarec_hacks; u_int stop_after_jal; +#ifndef RAM_FIXED + static u_int ram_offset; +#else + static const u_int ram_offset=0; +#endif extern u_char restore_candidate[512]; extern int cycle_count; @@ -277,6 +279,14 @@ int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 +int cycle_multiplier; // 100 for 1.0 + +static int CLOCK_ADJUST(int x) +{ + int s=(x>>31)|1; + return (x * cycle_multiplier + s * 50) / 100; +} + static void tlb_hacks() { #ifndef DISABLE_TLB @@ -341,6 +351,7 @@ static u_int get_page(u_int vaddr) return page; } +#ifndef PCSX static u_int get_vpage(u_int vaddr) { u_int vpage=(vaddr^0x80000000)>>12; @@ -350,6 +361,13 @@ static u_int get_vpage(u_int vaddr) if(vpage>2048) vpage=2048+(vpage&2047); return vpage; } +#else +// no virtual mem in PCSX +static u_int get_vpage(u_int vaddr) +{ + return get_page(vaddr); +} +#endif // Get address from virtual address // This is called from the recompiled JR/JALR instructions @@ -590,11 +608,11 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) for (hr=0;hrregmap[hr]==reg) { cur->isconst|=1<constmap[hr]=value; + current_constmap[hr]=value; } else if((cur->regmap[hr]^64)==reg) { cur->isconst|=1<constmap[hr]=value>>32; + current_constmap[hr]=value>>32; } } } @@ -628,7 +646,7 @@ uint64_t get_const(struct regstat *cur,signed char reg) if(!reg) return 0; for (hr=0;hrregmap[hr]==reg) { - return cur->constmap[hr]; + return current_constmap[hr]; } } printf("Unknown constant in r%d\n",reg); @@ -1198,7 +1216,7 @@ void invalidate_block(u_int block) if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision get_bounds((int)head->addr,&start,&end); //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { + if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) { if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { if((((start-(u_int)rdram)>>12)&2047)>12)&2047; if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; @@ -1224,14 +1242,15 @@ void invalidate_addr(u_int addr) //static int rhits; // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } - u_int page=get_page(addr); + u_int page=get_vpage(addr); if(page<2048) { // RAM struct ll_entry *head; u_int addr_min=~0, addr_max=0; - int mask=RAM_SIZE-1; + u_int mask=RAM_SIZE-1; + u_int addr_main=0x80000000|(addr&mask); int pg1; - inv_code_start=addr&~0xfff; - inv_code_end=addr|0xfff; + inv_code_start=addr_main&~0xfff; + inv_code_end=addr_main|0xfff; pg1=page; if (pg1>0) { // must check previous page too because of spans.. @@ -1242,11 +1261,15 @@ void invalidate_addr(u_int addr) for(head=jump_dirty[pg1];head!=NULL;head=head->next) { u_int start,end; get_bounds((int)head->addr,&start,&end); - if((start&mask)<=(addr&mask)&&(addr&mask)<(end&mask)) { + if(ram_offset) { + start-=ram_offset; + end-=ram_offset; + } + if(start<=addr_main&&addr_mainaddr_max) addr_max=end; } - else if(addr>12); @@ -2906,6 +2929,10 @@ void load_assemble(int i,struct regstat *i_regs) jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); } } + else if(ram_offset&&memtarget) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + fastload_reg_override=HOST_TEMPREG; + } }else{ // using tlb int x=0; if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU @@ -2971,7 +2998,7 @@ void load_assemble(int i,struct regstat *i_regs) gen_tlb_addr_r(a,map); emit_movswl_indexed(x,a,tl); }else{ - #ifdef RAM_OFFSET + #if 1 //def RAM_OFFSET emit_movswl_indexed(x,a,tl); #else emit_movswl_indexed((int)rdram-0x80000000+x,a,tl); @@ -3058,7 +3085,7 @@ void load_assemble(int i,struct regstat *i_regs) gen_tlb_addr_r(a,map); emit_movzwl_indexed(x,a,tl); }else{ - #ifdef RAM_OFFSET + #if 1 //def RAM_OFFSET emit_movzwl_indexed(x,a,tl); #else emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl); @@ -3215,6 +3242,10 @@ void store_assemble(int i,struct regstat *i_regs) jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); #endif } + else if(ram_offset&&memtarget) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + faststore_reg_override=HOST_TEMPREG; + } }else{ // using tlb int x=0; if (opcode[i]==0x28) x=3; // SB @@ -3259,7 +3290,8 @@ void store_assemble(int i,struct regstat *i_regs) gen_tlb_addr_w(a,map); emit_writehword_indexed(tl,x,a); }else - emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); + //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); + emit_writehword_indexed(tl,x,a); } type=STOREH_STUB; } @@ -3298,7 +3330,7 @@ void store_assemble(int i,struct regstat *i_regs) jaddr=0; } #endif - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<regmap,rs2[i],ccadj[i],reglist); + inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist); + } + // basic current block modification detection.. + // not looking back as that should be in mips cache already + if(c&&start+i*4regmap==regs[i].regmap); // not delay slot + if(i_regs->regmap==regs[i].regmap) { + load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); + wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); + emit_movimm(start+i*4+4,0); + emit_writeword(0,(int)&pcaddr); + emit_jmp((int)do_interrupt); + } } //if(opcode[i]==0x2B || opcode[i]==0x3F) //if(opcode[i]==0x2B || opcode[i]==0x28) @@ -3572,7 +3618,7 @@ void storelr_assemble(int i,struct regstat *i_regs) } if(!c||!memtarget) add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist); - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<regmap,ROREG); if(map<0) map=HOST_TEMPREG; @@ -3750,7 +3796,7 @@ void c1ls_assemble(int i,struct regstat *i_regs) emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp); type=STORED_STUB; } - if(!using_tlb) { + if(!using_tlb&&!(i_regs->waswritten&(1<waswritten&(1<regmap,INVCP); assert(ir>=0); @@ -4257,6 +4308,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000)) #endif emit_movimm(constmap[i][rs]+offset,ra); + regs[i].loadedconst|=1<=(signed int)0xC0000000)) #endif emit_movimm(constmap[i+1][rs]+offset,ra); + regs[i+1].loadedconst|=1<=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr] + &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1)) + { + regs[i].loadedconst|=1<=0) { //if(entry[hr]!=regmap[hr]) { - if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) { + if(!((regs[i].loadedconst>>hr)&1)) { if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) { - int value; + int value,similar=0; if(get_final_value(hr,i,&value)) { - if(value==0) { + // see if some other register has similar value + for(hr2=0;hr2>hr2)&1)) { + if(is_similar_value(value,constmap[i][hr2])) { + similar=1; + break; + } + } + } + if(similar) { + int value2; + if(get_final_value(hr2,i,&value2)) // is this needed? + emit_movimm_from(value2,hr2,value,hr); + else + emit_movimm(value,hr); + } + else if(value==0) { emit_zeroreg(hr); } else { emit_movimm(value,hr); } } + regs[i].loadedconst|=1<=istart;i--) { @@ -6761,7 +6847,7 @@ void unneeded_registers(int istart,int iend,int r) // Branch out of this block, flush all regs u=1; uu=1; - gte_u=0; + gte_u=gte_u_unknown; /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { @@ -6807,7 +6893,7 @@ void unneeded_registers(int istart,int iend,int r) { u=1; uu=1; - gte_u=0; + gte_u=gte_u_unknown; } } } @@ -6850,7 +6936,7 @@ void unneeded_registers(int istart,int iend,int r) { temp_u=1; temp_uu=1; - temp_gte_u=0; + temp_gte_u=gte_u_unknown; } } tdep=(~temp_uu>>rt1[i])&1; @@ -6872,7 +6958,7 @@ void unneeded_registers(int istart,int iend,int r) }else{ unneeded_reg[(ba[i]-start)>>2]=1; unneeded_reg_upper[(ba[i]-start)>>2]=1; - gte_unneeded[(ba[i]-start)>>2]=0; + gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown; } } /*else*/ if(1) { if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) @@ -6977,6 +7063,8 @@ void unneeded_registers(int istart,int iend,int r) uu&=~(1LL<> 15) & 3; + gte_rs[i]&=~0xe3fll; + if(v==3) gte_rs[i]|=0xe00ll; + else gte_rs[i]|=3ll<<(v*2); + } break; case FLOAT: case FCONV: @@ -8787,6 +8886,7 @@ int new_recompile_block(int addr) dirty_reg(¤t,CCREG); current.isconst=0; current.wasconst=0; + current.waswritten=0; int ds=0; int cc=0; int hr=-1; @@ -8815,6 +8915,7 @@ int new_recompile_block(int addr) if(current.regmap[hr]==0) current.regmap[hr]=-1; } current.isconst=0; + current.waswritten=0; } if(i>1) { @@ -8879,6 +8980,7 @@ int new_recompile_block(int addr) regs[i].wasconst=current.isconst; regs[i].was32=current.is32; regs[i].wasdirty=current.dirty; + regs[i].loadedconst=0; #if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32) // To change a dirty register from 32 to 64 bits, we must write // it out during the previous cycle (for branches, 2 cycles) @@ -9441,6 +9543,14 @@ int new_recompile_block(int addr) } memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap)); } + + if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800) + current.waswritten|=1<=0x800) + current.waswritten&=~(1<0) { @@ -9461,7 +9571,7 @@ int new_recompile_block(int addr) branch_regs[i-1].is32|=1LL<<31; } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case RJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -9486,7 +9596,7 @@ int new_recompile_block(int addr) } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); break; case CJUMP: if((opcode[i-1]&0x3E)==4) // BEQ/BNE @@ -9522,7 +9632,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ @@ -9555,7 +9665,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9621,7 +9731,7 @@ int new_recompile_block(int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current.constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); } else // Alloc the delay slot in case the branch is taken @@ -9735,7 +9845,12 @@ int new_recompile_block(int addr) { cc=0; } -#ifdef PCSX +#if defined(PCSX) && !defined(DRC_DBG) + else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2) + { + // GTE runs in parallel until accessed, divide by 2 for a rough guess + cc+=gte_cycletab[source[i]&0x3f]/2; + } else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues { cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER) @@ -9755,7 +9870,7 @@ int new_recompile_block(int addr) regs[i].is32=current.is32; regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; - memcpy(constmap[i],current.constmap,sizeof(current.constmap)); + memcpy(constmap[i],current_constmap,sizeof(current_constmap)); } for(hr=0;hr=0) { @@ -9765,6 +9880,7 @@ int new_recompile_block(int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; + regs[i].waswritten=current.waswritten; } /* Pass 4 - Cull unused host registers */ @@ -11463,7 +11579,7 @@ int new_recompile_block(int addr) // If we're within 256K of the end of the buffer, // start over from the beginning. (Is 256K enough?) - if((int)out>BASE_ADDR+(1<(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { @@ -11491,11 +11607,11 @@ int new_recompile_block(int addr) /* Pass 10 - Free memory by expiring oldest blocks */ - int end=((((int)out-BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { int shift=TARGET_SIZE_2-3; // Divide into 8 blocks - int base=BASE_ADDR+((expirep>>13)<>13)<>11)&3) {