X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=573d0cdbfaf2f2b5f5d812b4ec01452a8a9f1ff8;hp=032d6984b128af82ba7b61e86a7d1b917a5166b6;hb=748406cf9b74d0cd0537768d68cbae926394ea5b;hpb=5194fb95c9650bcb97aa811c08cb3b051e2a9561 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 032d6984..573d0cdb 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1,6 +1,6 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Mupen64plus - new_dynarec.c * - * Copyright (C) 2009-2010 Ari64 * + * Copyright (C) 2009-2011 Ari64 * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -599,6 +599,7 @@ void clear_const(struct regstat *cur,signed char reg) int is_const(struct regstat *cur,signed char reg) { int hr; + if(reg<0) return 0; if(!reg) return 1; for (hr=0;hrregmap[hr]&63)==reg) { @@ -689,6 +690,10 @@ void lsn(u_char hsn[], int i, int *preferred_reg) hsn[RHASH]=1; hsn[RHTBL]=1; } + // due to the way JAL is currently done we need DS not to evict $ra + if(i>0&&itype[i-1]==UJUMP&&rt1[i-1]==31) { + hsn[31]=0; + } // Coprocessor load/store needs FTEMP, even if not declared if(itype[i]==C1LS||itype[i]==C2LS) { hsn[FTEMP]=0; @@ -719,12 +724,6 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - int hr; - u_char hsn[MAXREG+1]; - int preferred_reg; - - memset(hsn,10,sizeof(hsn)); - lsn(hsn,i,&preferred_reg); if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { @@ -777,11 +776,7 @@ int needed_again(int r, int i) } } }*/ - for(hr=0;hru&=~1LL; // Allow allocating r0 if it's the source register if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]); - if(rt1[i]) { + if(rt1[i]&&!((current->u>>rt1[i])&1)) { alloc_reg(current,i,rt1[i]); - if(get_reg(current->regmap,rt1[i])<0) { - // dummy load, but we still need a register to calculate the address - alloc_reg_temp(current,i,-1); - minimum_free_regs[i]=1; - } + assert(get_reg(current->regmap,rt1[i])>=0); if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD { current->is32&=~(1LL<regmap,rt1[i]|64); tl=get_reg(i_regs->regmap,rt1[i]); @@ -2831,6 +2825,7 @@ void load_assemble(int i,struct regstat *i_regs) if(sp_in_mirror&&rs1[i]==29) { emit_andimm(addr,~0x00e00000,HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG,RAM_SIZE); + fastload_reg_override=HOST_TEMPREG; } else #endif @@ -2851,6 +2846,7 @@ void load_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU map=get_reg(i_regs->regmap,TLREG); assert(map>=0); + reglist&=~(1<=0) emit_readword_indexed((int)rdram-0x80000000,addr,th); //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl); @@ -3113,6 +3097,7 @@ void store_assemble(int i,struct regstat *i_regs) int jaddr=0,jaddr2,type; int memtarget=0,c=0; int agr=AGEN1+(i&1); + int faststore_reg_override=0; u_int hr,reglist=0; th=get_reg(i_regs->regmap,rs2[i]|64); tl=get_reg(i_regs->regmap,rs2[i]); @@ -3141,6 +3126,7 @@ void store_assemble(int i,struct regstat *i_regs) if(sp_in_mirror&&rs1[i]==29) { emit_andimm(addr,~0x00e00000,HOST_TEMPREG); emit_cmpimm(HOST_TEMPREG,RAM_SIZE); + faststore_reg_override=HOST_TEMPREG; } else #endif @@ -3173,6 +3159,7 @@ void store_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x29) x=2; // SH map=get_reg(i_regs->regmap,TLREG); assert(map>=0); + reglist&=~(1<=0); //emit_writeword_indexed(th,(int)rdram-0x80000000,addr); @@ -3283,8 +3262,12 @@ void store_assemble(int i,struct regstat *i_regs) //if(opcode[i]==0x2B) /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F) { - //emit_pusha(); + #ifdef __i386__ + emit_pusha(); + #endif + #ifdef __arm__ save_regs(0x100f); + #endif emit_readword((int)&last_count,ECX); #ifdef __i386__ if(get_reg(i_regs->regmap,CCREG)<0) @@ -3303,8 +3286,12 @@ void store_assemble(int i,struct regstat *i_regs) emit_writeword(0,(int)&Count); #endif emit_call((int)memdebug); - //emit_popa(); + #ifdef __i386__ + emit_popa(); + #endif + #ifdef __arm__ restore_regs(0x100f); + #endif }/**/ } @@ -3363,6 +3350,7 @@ void storelr_assemble(int i,struct regstat *i_regs) }else{ // using tlb int map=get_reg(i_regs->regmap,TLREG); assert(map>=0); + reglist&=~(1<=0) emit_mov(s,temp); do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr); @@ -3613,6 +3601,7 @@ void c1ls_assemble(int i,struct regstat *i_regs) { map=get_reg(i_regs->regmap,TLREG); assert(map>=0); + reglist&=~(1<0 && i_regmap[hr]!=CCREG) + if(i_regmap[hr]>0 && (i_regmap[hr]&63)0 && i_regmap[hr]!=CCREG) + if(i_regmap[hr]>0 && (i_regmap[hr]&63)=0&®s[t].regmap_entry[hr]<64) { + if(regs[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]=64) { + if(regs[t].regmap_entry[hr]>=64&®s[t].regmap_entry[hr]>(regs[t].regmap_entry[hr]&63))&1) { int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64); @@ -4621,7 +4610,7 @@ void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int ad } // Load 32-bit regs for(hr=0;hr=0&®s[t].regmap_entry[hr]<64) { + if(hr!=EXCLUDE_REG&®s[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]>hr)&1) && ((i_dirty>>hr)&1) && (((i_is32&~unneeded_reg_upper[t])>>i_regmap[hr])&1) ) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) { #else @@ -4639,7 +4628,7 @@ void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int ad } //Load 64-bit regs for(hr=0;hr=64) { + if(hr!=EXCLUDE_REG&®s[t].regmap_entry[hr]>=64&®s[t].regmap_entry[hr]>(regs[t].regmap_entry[hr]&63))&1) { @@ -4680,19 +4669,19 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) { if(i_regmap[hr]!=regs[t].regmap_entry[hr]) { - if(regs[t].regmap_entry[hr]!=-1) + if(regs[t].regmap_entry[hr]>=0&&(regs[t].regmap_entry[hr]|64)>hr)&1) { - if(i_regmap[hr]<64) + if(i_regmap[hr]>i_regmap[hr])&1)) return 0; } - else + else if(i_regmap[hr]>=64&&i_regmap[hr]>(i_regmap[hr]&63))&1)) return 0; @@ -4866,7 +4855,7 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) } else { - emit_cmpimm(HOST_CCREG,-2*(count+2)); + emit_cmpimm(HOST_CCREG,-CLOCK_DIVIDER*(count+2)); jaddr=(int)out; emit_jns(0); } @@ -7460,6 +7449,10 @@ void clean_registers(int istart,int iend,int wr) will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<=0) { + will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<>2]>>(branch_regs[i].regmap[r]&63))&1)<start+i*4) { // Disable recursion (for debugging) for(r=0;r>2].regmap_entry[r]) { + signed char target_reg=branch_regs[i].regmap[r]; + if(target_reg==regs[(ba[i]-start)>>2].regmap_entry[r]) { will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<>2]&(1<=0) { + will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<>2]>>(target_reg&63))&1)<>2].regmap_entry[r]) { @@ -7532,7 +7526,7 @@ void clean_registers(int istart,int iend,int wr) } } } - // Merge in delay slot + // Merge in delay slot (won't dirty) for(r=0;r=0) { + else if(regmap_pre[i][r]>=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) { // Register moved to a different register will_dirty_i&=~(1<=0;j--) { + if(ba[j]==start+i*4) done=j=0; // Branch into delay slot if(ba[j]==start+i*4+4) done=j=0; if(ba[j]==start+i*4+8) done=j=0; } @@ -8689,7 +8686,9 @@ int new_recompile_block(int addr) } if(temp_is32!=current.is32) { //printf("dumping 32-bit regs (%x)\n",start+i*4); - #ifdef DESTRUCTIVE_WRITEBACK + #ifndef DESTRUCTIVE_WRITEBACK + if(ds) + #endif for(hr=0;hr>2; if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP&&itype[t-1]!=FJUMP)) // loop_preload can't handle jumps into delay slots - if(t<2||(itype[t-2]!=UJUMP)) // call/ret assumes no registers allocated + if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated for(hr=0;hr64) { @@ -9949,7 +9952,7 @@ int new_recompile_block(int addr) // a mov, which is of negligible benefit. So such cases are // skipped below. if(f_regmap[hr]>0) { - if(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0) { + if(regs[t].regmap[hr]==f_regmap[hr]||(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0)) { int r=f_regmap[hr]; for(j=t;j<=i;j++) { @@ -9988,7 +9991,7 @@ int new_recompile_block(int addr) break; } // call/ret fast path assumes no registers allocated - if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)) { + if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==31) { break; } if(r>63) { @@ -10136,7 +10139,7 @@ int new_recompile_block(int addr) } } }else{ - int count=0; + // Non branch or undetermined branch target for(hr=0;hr=0) { + score[hr]=0;earliest_available[hr]=i+1; + loop_start[hr]=MAXBLOCK; + } + if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) { + if(branch_regs[i].regmap[hr]>=0) { + score[hr]=0;earliest_available[hr]=i+2; + loop_start[hr]=MAXBLOCK; + } + } + } + // No register allocations after unconditional jumps + if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000) + { + for(hr=0;hr=0) break; + if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { + if(branch_regs[j].regmap[hr]>=0) break; + if(ooo[j]) { + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; + }else{ + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; + } + } + else if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) break; + if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { + int t=(ba[j]-start)>>2; + if(t=earliest_available[hr]) { + if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) { // call/ret assumes no registers allocated + // Score a point for hoisting loop invariant + if(t>16)==0x1000) + { + // Stop on unconditional branch + break; + } + else + if(itype[j]==LOAD||itype[j]==LOADLR|| + itype[j]==STORE||itype[j]==STORELR||itype[j]==C1LS) { + score[hr]++; + end[hr]=j; + } + } + } + } + // Find highest score and allocate that register + int maxscore=0; + for(hr=0;hrscore[maxscore]) { + maxscore=hr; + //printf("highest score: %d %d (%x->%x)\n",score[hr],hr,start+i*4,start+end[hr]*4); + } + } + } + if(score[maxscore]>1) + { + if(i=0) {printf("oops: %x %x was %d=%d\n",loop_start[maxscore]*4+start,j*4+start,maxscore,regs[j].regmap[maxscore]);} + assert(regs[j].regmap[maxscore]<0); + if(j>loop_start[maxscore]) regs[j].regmap_entry[maxscore]=reg; + regs[j].regmap[maxscore]=reg; + regs[j].dirty&=~(1<>16)!=0x1000) { + regmap_pre[j+2][maxscore]=reg; + regs[j+2].wasdirty&=~(1<>2; + if(t==loop_start[maxscore]) { + if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) // call/ret assumes no registers allocated + regs[t].regmap_entry[maxscore]=reg; + } + } + else + { + if(j<1||(itype[j-1]!=RJUMP&&itype[j-1]!=UJUMP&&itype[j-1]!=CJUMP&&itype[j-1]!=SJUMP&&itype[j-1]!=FJUMP)) { + regmap_pre[j+1][maxscore]=reg; + regs[j+1].wasdirty&=~(1<=0) { @@ -10283,6 +10466,7 @@ int new_recompile_block(int addr) } } } + // Load source into target register if(lt1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) { if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0) { @@ -10299,6 +10483,7 @@ int new_recompile_block(int addr) } } } + // Preload map address #ifndef HOST_IMM_ADDR32 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) { hr=get_reg(regs[i+1].regmap,TLREG); @@ -10338,6 +10523,7 @@ int new_recompile_block(int addr) } } #endif + // Address for store instruction (non-constant) if(itype[i+1]==STORE||itype[i+1]==STORELR ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) { @@ -10537,6 +10723,19 @@ int new_recompile_block(int addr) } //requires_32bit[i]=is32[i]&~unneeded_reg_upper[i]; // DEBUG } +#else + for (i=slen-1;i>=0;i--) + { + if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) + { + // Conditional branch + if((source[i]>>16)!=0x1000&&i>16)!=0x1000))