X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=c1fa775d27a8c60730906b04652ebe00b316f34c;hp=c6d83c2c6b0856f9c2318ac3e233fc6851424a25;hb=311301dc26918fcb95577247257380960f2313ba;hpb=e3234ecf9665738e35a749fbb9d4120f25a0c7cf diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index c6d83c2c..c1fa775d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -8688,7 +8688,9 @@ int new_recompile_block(int addr) } if(temp_is32!=current.is32) { //printf("dumping 32-bit regs (%x)\n",start+i*4); - #ifdef DESTRUCTIVE_WRITEBACK + #ifndef DESTRUCTIVE_WRITEBACK + if(ds) + #endif for(hr=0;hr>2; if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP&&itype[t-1]!=FJUMP)) // loop_preload can't handle jumps into delay slots - if(t<2||(itype[t-2]!=UJUMP)) // call/ret assumes no registers allocated + if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated for(hr=0;hr64) { @@ -9953,7 +9954,7 @@ int new_recompile_block(int addr) // a mov, which is of negligible benefit. So such cases are // skipped below. if(f_regmap[hr]>0) { - if(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0) { + if(regs[t].regmap[hr]==f_regmap[hr]||(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0)) { int r=f_regmap[hr]; for(j=t;j<=i;j++) { @@ -9992,7 +9993,7 @@ int new_recompile_block(int addr) break; } // call/ret fast path assumes no registers allocated - if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)) { + if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==31) { break; } if(r>63) { @@ -10140,7 +10141,7 @@ int new_recompile_block(int addr) } } }else{ - int count=0; + // Non branch or undetermined branch target for(hr=0;hr=0) { + score[hr]=0;earliest_available[hr]=i+1; + loop_start[hr]=MAXBLOCK; + } + if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) { + if(branch_regs[i].regmap[hr]>=0) { + score[hr]=0;earliest_available[hr]=i+2; + loop_start[hr]=MAXBLOCK; + } + } + } + // No register allocations after unconditional jumps + if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000) + { + for(hr=0;hr=0) break; + if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { + if(branch_regs[j].regmap[hr]>=0) break; + if(ooo[j]) { + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; + }else{ + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; + } + } + else if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) break; + if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { + int t=(ba[j]-start)>>2; + if(t=earliest_available[hr]) { + if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) { // call/ret assumes no registers allocated + // Score a point for hoisting loop invariant + if(t>16)==0x1000) + { + // Stop on unconditional branch + break; + } + else + if(itype[j]==LOAD||itype[j]==LOADLR|| + itype[j]==STORE||itype[j]==STORELR||itype[j]==C1LS) { + score[hr]++; + end[hr]=j; + } + } + } + } + // Find highest score and allocate that register + int maxscore=0; + for(hr=0;hrscore[maxscore]) { + maxscore=hr; + //printf("highest score: %d %d (%x->%x)\n",score[hr],hr,start+i*4,start+end[hr]*4); + } + } + } + if(score[maxscore]>1) + { + if(i=0) {printf("oops: %x %x was %d=%d\n",loop_start[maxscore]*4+start,j*4+start,maxscore,regs[j].regmap[maxscore]);} + assert(regs[j].regmap[maxscore]<0); + if(j>loop_start[maxscore]) regs[j].regmap_entry[maxscore]=reg; + regs[j].regmap[maxscore]=reg; + regs[j].dirty&=~(1<>16)!=0x1000) { + regmap_pre[j+2][maxscore]=reg; + regs[j+2].wasdirty&=~(1<>2; + if(t==loop_start[maxscore]) { + if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) // call/ret assumes no registers allocated + regs[t].regmap_entry[maxscore]=reg; + } + } + else + { + if(j<1||(itype[j-1]!=RJUMP&&itype[j-1]!=UJUMP&&itype[j-1]!=CJUMP&&itype[j-1]!=SJUMP&&itype[j-1]!=FJUMP)) { + regmap_pre[j+1][maxscore]=reg; + regs[j+1].wasdirty&=~(1<=0) { @@ -10287,6 +10468,7 @@ int new_recompile_block(int addr) } } } + // Load source into target register if(lt1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) { if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0) { @@ -10303,6 +10485,7 @@ int new_recompile_block(int addr) } } } + // Preload map address #ifndef HOST_IMM_ADDR32 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) { hr=get_reg(regs[i+1].regmap,TLREG); @@ -10342,6 +10525,7 @@ int new_recompile_block(int addr) } } #endif + // Address for store instruction (non-constant) if(itype[i+1]==STORE||itype[i+1]==STORELR ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {