X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=abb0d07611dc5098a71ded4749ae0ffcfa1b7295;hb=9de8a0c3587effdbd19584cbca3baf566d1d21bd;hp=de444483f8bb4d38922d8e2f986ad78a7f2ac7b7;hpb=6cc8d23c0c5284522c62ce333a7c2286198e9db8;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index de444483..abb0d076 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -123,6 +123,7 @@ enum stub_type { // don't match .regmap will be written back // [i].regmap_entry - regs that must be set up if someone jumps here // [i].regmap - regs [i] insn will read/(over)write +// branch_regs[i].* - same as above but for branches, takes delay slot into account struct regstat { signed char regmap_entry[HOST_REGS]; @@ -584,14 +585,12 @@ void noinline *get_addr(u_int vaddr) //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr); int r=new_recompile_block(vaddr); if(r==0) return get_addr(vaddr); - // Execute in unmapped page, generate pagefault execption + // generate an address error Status|=2; - Cause=(vaddr<<31)|0x8; + Cause=(vaddr<<31)|(4<<2); EPC=(vaddr&1)?vaddr-5:vaddr; BadVAddr=(vaddr&~1); - Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0); - EntryHi=BadVAddr&0xFFFFE000; - return get_addr_ht(0x80000000); + return get_addr_ht(0x80000080); } // Look up address in hash table first void *get_addr_ht(u_int vaddr) @@ -608,10 +607,27 @@ static void clear_all_regs(signed char regmap[]) memset(regmap, -1, sizeof(regmap[0]) * HOST_REGS); } -static signed char get_reg(const signed char regmap[],int r) +static signed char get_reg(const signed char regmap[], signed char r) { int hr; - for (hr=0;hrregmap[hr]&63)==reg) { - cur->dirty|=1<regmap, reg); + if (hr >= 0) + cur->dirty |= 1<regmap[hr]==reg) { - cur->isconst|=1<regmap, reg); + if (hr >= 0) { + cur->isconst |= 1<regmap[hr]&63)==reg) { - cur->isconst&=~(1<regmap, reg); + if (hr >= 0) + cur->isconst &= ~(1<regmap[hr]&63)==reg) { - return (cur->isconst>>hr)&1; - } - } + if (reg < 0) return 0; + if (!reg) return 1; + hr = get_reg(cur->regmap, reg); + if (hr >= 0) + return (cur->isconst>>hr)&1; return 0; } -static uint32_t get_const(struct regstat *cur, signed char reg) +static uint32_t get_const(const struct regstat *cur, signed char reg) { int hr; - if(!reg) return 0; - for (hr=0;hrregmap[hr]==reg) { - return current_constmap[hr]; - } - } - SysPrintf("Unknown constant in r%d\n",reg); + if (!reg) return 0; + hr = get_reg(cur->regmap, reg); + if (hr >= 0) + return current_constmap[hr]; + + SysPrintf("Unknown constant in r%d\n", reg); abort(); } @@ -880,14 +888,14 @@ void alloc_all(struct regstat *cur,int i) for(hr=0;hrregmap[hr]&63)!=dops[i].rs1)&&((cur->regmap[hr]&63)!=dops[i].rs2)&& - ((cur->regmap[hr]&63)!=dops[i].rt1)&&((cur->regmap[hr]&63)!=dops[i].rt2)) + if((cur->regmap[hr]!=dops[i].rs1)&&(cur->regmap[hr]!=dops[i].rs2)&& + (cur->regmap[hr]!=dops[i].rt1)&&(cur->regmap[hr]!=dops[i].rt2)) { cur->regmap[hr]=-1; cur->dirty&=~(1<regmap[hr]&63)==0) + if(cur->regmap[hr]==0) { cur->regmap[hr]=-1; cur->dirty&=~(1<regmap[preferred_reg]&63]==j) { for(hr=0;hrregmap[hr]&63)==r) { + if(cur->regmap[hr]==r) { cur->regmap[hr]=-1; cur->dirty&=~(1<isconst&=~(1<>hr)&1) { assert(regmap[hr]<64); emit_storereg(r,hr); @@ -2149,7 +2157,7 @@ static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int for(hr=0;hr>(reg&63))&1) { + if(((~u)>>reg)&1) { if(reg>0) { if(((dirty_pre&~dirty)>>hr)&1) { if(reg>0&®<34) { @@ -2807,12 +2815,12 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) // could be FIFO, must perform the read // ||dummy read assem_debug("(forced read)\n"); - tl=get_reg(i_regs->regmap,-1); + tl=get_reg_temp(i_regs->regmap); assert(tl>=0); } if(offset||s<0||c) addr=tl; else addr=s; - //if(tl<0) tl=get_reg(i_regs->regmap,-1); + //if(tl<0) tl=get_reg_temp(i_regs->regmap); if(tl>=0) { //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset); @@ -2943,7 +2951,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) u_int reglist=get_host_reglist(i_regs->regmap); tl=get_reg(i_regs->regmap,dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); - temp=get_reg(i_regs->regmap,-1); + temp=get_reg_temp(i_regs->regmap); temp2=get_reg(i_regs->regmap,FTEMP); addr=get_reg(i_regs->regmap,AGEN1+(i&1)); assert(addr<0); @@ -3029,7 +3037,7 @@ static void store_assemble(int i, const struct regstat *i_regs, int ccadj_) tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg(i_regs->regmap,-1); + if(temp<0) temp=get_reg_temp(i_regs->regmap); offset=imm[i]; if(s>=0) { c=(i_regs->wasconst>>s)&1; @@ -3154,7 +3162,7 @@ static void storelr_assemble(int i, const struct regstat *i_regs, int ccadj_) tl=get_reg(i_regs->regmap,dops[i].rs2); s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg(i_regs->regmap,-1); + if(temp<0) temp=get_reg_temp(i_regs->regmap); offset=imm[i]; if(s>=0) { c=(i_regs->isconst>>s)&1; @@ -3583,7 +3591,7 @@ static void multdiv_do_stall(int i, const struct regstat *i_regs) { int j, known_cycles = 0; u_int reglist = get_host_reglist(i_regs->regmap); - int rtmp = get_reg(i_regs->regmap, -1); + int rtmp = get_reg_temp(i_regs->regmap); if (rtmp < 0) rtmp = reglist_find_free(reglist); if (HACK_ENABLED(NDHACK_NO_STALLS)) @@ -3732,7 +3740,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) // get the address if (dops[i].opcode==0x3a) { // SWC2 ar=get_reg(i_regs->regmap,agr); - if(ar<0) ar=get_reg(i_regs->regmap,-1); + if(ar<0) ar=get_reg_temp(i_regs->regmap); reglist|=1<>11) & 0x1f; - signed char temp = get_reg(i_regs->regmap, -1); + signed char temp = get_reg_temp(i_regs->regmap); if (!HACK_ENABLED(NDHACK_NO_STALLS)) { u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), temp, -1); @@ -3952,8 +3960,7 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) void *func = (dops[i].opcode2 == 0x0C) ? (is_delayslot ? jump_syscall_ds : jump_syscall) : (is_delayslot ? jump_break_ds : jump_break); - signed char ccreg = get_reg(i_regs->regmap, CCREG); - assert(ccreg == HOST_CCREG); + assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); emit_movimm(start + i*4, 2); // pc emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); emit_far_jump(func); @@ -4218,7 +4225,7 @@ static void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,u for(hr=0;hr=0&&(pre[hr]&63)=0&&pre[hr]=0) { emit_mov(hr,nr); @@ -4293,7 +4300,7 @@ void address_generation(int i, const struct regstat *i_regs, signed char entry[] int agr=AGEN1+(i&1); if(dops[i].itype==LOAD) { ra=get_reg(i_regs->regmap,dops[i].rt1); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg_temp(i_regs->regmap); assert(ra>=0); } if(dops[i].itype==LOADLR) { @@ -4301,14 +4308,14 @@ void address_generation(int i, const struct regstat *i_regs, signed char entry[] } if(dops[i].itype==STORE||dops[i].itype==STORELR) { ra=get_reg(i_regs->regmap,agr); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg_temp(i_regs->regmap); } if(dops[i].itype==C2LS) { if ((dops[i].opcode&0x3b)==0x31||(dops[i].opcode&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2 ra=get_reg(i_regs->regmap,FTEMP); else { // SWC1/SDC1/SWC2/SDC2 ra=get_reg(i_regs->regmap,agr); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg_temp(i_regs->regmap); } } int rs=get_reg(i_regs->regmap,dops[i].rs1); @@ -4555,7 +4562,7 @@ static void load_all_regs(const signed char i_regmap[]) emit_zeroreg(hr); } else - if(i_regmap[hr]>0 && (i_regmap[hr]&63)0 && i_regmap[hr]0 && (i_regmap[hr]&63)0 && i_regmap[hr]regmap[hr]&63)!=dops[i].rs1 && - (i_regs->regmap[hr]&63)!=dops[i].rs2 ) + i_regs->regmap[hr]!=dops[i].rs1 && + i_regs->regmap[hr]!=dops[i].rs2 ) { addr=hr++;break; } @@ -5913,8 +5922,8 @@ static void pagespan_assemble(int i, const struct regstat *i_regs) while(hrregmap[hr]&63)!=dops[i].rs1 && - (i_regs->regmap[hr]&63)!=dops[i].rs2 ) + i_regs->regmap[hr]!=dops[i].rs1 && + i_regs->regmap[hr]!=dops[i].rs2 ) { alt=hr++;break; } @@ -5925,8 +5934,8 @@ static void pagespan_assemble(int i, const struct regstat *i_regs) while(hrregmap[hr]&63)!=dops[i].rs1 && - (i_regs->regmap[hr]&63)!=dops[i].rs2 ) + i_regs->regmap[hr]!=dops[i].rs1 && + i_regs->regmap[hr]!=dops[i].rs2 ) { ntaddr=hr;break; } @@ -6122,7 +6131,7 @@ static void pagespan_ds() } int btaddr=get_reg(regs[0].regmap,BTREG); if(btaddr<0) { - btaddr=get_reg(regs[0].regmap,-1); + btaddr=get_reg_temp(regs[0].regmap); emit_readword(&branch_target,btaddr); } assert(btaddr!=HOST_CCREG); @@ -6144,8 +6153,21 @@ static void pagespan_ds() load_regs_bt(regs[0].regmap,regs[0].dirty,start+4); } +static void check_regmap(signed char *regmap) +{ +#ifndef NDEBUG + int i,j; + for (i = 0; i < HOST_REGS; i++) { + if (regmap[i] < 0) + continue; + for (j = i + 1; j < HOST_REGS; j++) + assert(regmap[i] != regmap[j]); + } +#endif +} + // Basic liveness analysis for MIPS registers -void unneeded_registers(int istart,int iend,int r) +static void unneeded_registers(int istart,int iend,int r) { int i; uint64_t u,gte_u,b,gte_b; @@ -6315,6 +6337,7 @@ void clean_registers(int istart,int iend,int wr) } for (i=iend;i>=istart;i--) { + __builtin_prefetch(regs[i-1].regmap); if(dops[i].is_jump) { if(ba[i]=(start+slen*4)) @@ -6328,18 +6351,18 @@ void clean_registers(int istart,int iend,int wr) // Merge in delay slot (will dirty) for(r=0;r33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<33) temp_will_dirty&=~(1<0 && (regmap_pre[i][r]&63)<34) { - temp_will_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<>(regmap_pre[i][r]&63))&1)<0 && regmap_pre[i][r]<34) { + temp_will_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>2]&(1<=0) { - will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<>2]>>(branch_regs[i].regmap[r]&63))&1)<>2]>>branch_regs[i].regmap[r])&1)<>2]>>branch_regs[i].regmap[r])&1)<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<start+i*4) { // Disable recursion (for debugging) + //if(ba[i]>start+i*4) // Disable recursion (for debugging) for(r=0;r>2]&(1<=0) { - will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<>2]>>(target_reg&63))&1)<>2]>>target_reg)&1)<>2]>>target_reg)&1)<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<33) will_dirty_i&=~(1<istart) { if (!dops[i].is_jump) { // Don't store a register immediately after writing it, // may prevent dual-issue. - if((regs[i].regmap[r]&63)==dops[i-1].rt1) wont_dirty_i|=1<0 && (regmap_pre[i][r]&63)<34) { - will_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<>(regmap_pre[i][r]&63))&1)<0 && regmap_pre[i][r]<34) { + will_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<>regmap_pre[i][r])&1)<>r)&1));*/ @@ -6715,6 +6736,24 @@ void clean_registers(int istart,int iend,int wr) } #ifdef DISASM +#include +void print_regmap(const char *name, const signed char *regmap) +{ + char buf[5]; + int i, l; + fputs(name, stdout); + for (i = 0; i < HOST_REGS; i++) { + l = 0; + if (regmap[i] >= 0) + l = snprintf(buf, sizeof(buf), "$%d", regmap[i]); + for (; l < 3; l++) + buf[l] = ' '; + buf[l] = 0; + printf(" r%d=%s", i, buf); + } + fputs("\n", stdout); +} + /* disassembly */ void disassemble_inst(int i) { @@ -6800,6 +6839,16 @@ void disassemble_inst(int i) //printf (" %s %8x\n",insn[i],source[i]); printf (" %x: %s\n",start+i*4,insn[i]); } + return; + printf("D: %"PRIu64" WD: %"PRIu64" U: %"PRIu64"\n", + regs[i].dirty, regs[i].wasdirty, unneeded_reg[i]); + print_regmap("pre: ", regmap_pre[i]); + print_regmap("entry: ", regs[i].regmap_entry); + print_regmap("map: ", regs[i].regmap); + if (dops[i].is_jump) { + print_regmap("bentry:", branch_regs[i].regmap_entry); + print_regmap("bmap: ", branch_regs[i].regmap); + } } #else static void disassemble_inst(int i) {} @@ -7175,8 +7224,12 @@ int new_recompile_block(u_int addr) source = get_source_start(start, &pagelimit); if (source == NULL) { - SysPrintf("Compile at bogus memory address: %08x\n", addr); - abort(); + if (addr != hack_addr) { + SysPrintf("Compile at bogus memory address: %08x\n", addr); + hack_addr = addr; + } + //abort(); + return -1; } /* Pass 1: disassemble */ @@ -7191,7 +7244,7 @@ int new_recompile_block(u_int addr) /* Pass 10: garbage collection / free memory */ int j; - int done=0; + int done = 0, ni_count = 0; unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); @@ -7681,7 +7734,7 @@ int new_recompile_block(u_int addr) assert(start+i*4 8 || dops[i].opcode == 0x11)) { done=stop_after_jal=1; SysPrintf("Disabled speculative precompilation\n"); } @@ -8060,7 +8113,7 @@ int new_recompile_block(u_int addr) if(r!=regmap_pre[i][hr]) { // TODO: delay slot (?) or=get_reg(regmap_pre[i],r); // Get old mapping for this register - if(or<0||(r&63)>=TEMPREG){ + if(or<0||r>=TEMPREG){ regs[i].regmap_entry[hr]=-1; } else @@ -8068,7 +8121,7 @@ int new_recompile_block(u_int addr) // Just move it to a different register regs[i].regmap_entry[hr]=r; // If it was dirty before, it's still dirty - if((regs[i].wasdirty>>or)&1) dirty_reg(¤t,r&63); + if((regs[i].wasdirty>>or)&1) dirty_reg(¤t,r); } } else @@ -8404,8 +8457,8 @@ int new_recompile_block(u_int addr) // Merge in delay slot for(hr=0;hr0&&!dops[i].bt&&((regs[i].wasdirty>>hr)&1)) { if((regmap_pre[i][hr]>0&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) { - if(dops[i-1].rt1==(regmap_pre[i][hr]&63)) nr|=1<0&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1))) { - if(dops[i-1].rt1==(regs[i].regmap_entry[hr]&63)) nr|=1< %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); if(r<34&&((unneeded_reg[j]>>r)&1)) break; assert(r < 64); - if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63) %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); int k; if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) { + if(get_reg(regs[i].regmap,f_regmap[hr])>=0) break; if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break; - if(r>63) { - if(get_reg(regs[i].regmap,r&63)<0) break; - if(get_reg(branch_regs[i].regmap,r&63)<0) break; - } k=i; while(k>1&®s[k-1].regmap[hr]==-1) { if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { @@ -8669,7 +8723,6 @@ int new_recompile_block(u_int addr) if(k>2&&(dops[k-3].itype==UJUMP||dops[k-3].itype==RJUMP)&&dops[k-3].rt1==31) { break; } - assert(r < 64); k--; } if(regs[k-1].regmap[hr]==f_regmap[hr]&®map_pre[k][hr]==f_regmap[hr]) { @@ -8947,7 +9000,7 @@ int new_recompile_block(u_int addr) ||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2 if(get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1); - if(hr<0) hr=get_reg(regs[i+1].regmap,-1); + if(hr<0) hr=get_reg_temp(regs[i+1].regmap); else { regs[i+1].regmap[hr]=AGEN1+((i+1)&1); regs[i+1].isconst&=~(1<=0&®s[i].regmap[hr]<0) { int rs=get_reg(regs[i+1].regmap,dops[i+1].rs1); @@ -9213,6 +9266,10 @@ int new_recompile_block(u_int addr) } for(i=0;i