X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=9bc0f60a4776b1f0c50b846022ddf8bd600548b8;hp=28a0245f249063e8a0aa85bccad231500db9bd10;hb=0bbd14543fec5fd4f5664b676771812663235252;hpb=535d208a8473e9255919b1e5bfe0b5aa88f6992a diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 28a0245f..9bc0f60a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -84,6 +84,7 @@ struct ll_entry u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; char is_ds[MAXBLOCK]; + char ooo[MAXBLOCK]; uint64_t unneeded_reg[MAXBLOCK]; uint64_t unneeded_reg_upper[MAXBLOCK]; uint64_t branch_unneeded_reg[MAXBLOCK]; @@ -94,10 +95,9 @@ struct ll_entry signed char regmap[MAXBLOCK][HOST_REGS]; signed char regmap_entry[MAXBLOCK][HOST_REGS]; uint64_t constmap[MAXBLOCK][HOST_REGS]; - uint64_t known_value[HOST_REGS]; - u_int known_reg; struct regstat regs[MAXBLOCK]; struct regstat branch_regs[MAXBLOCK]; + signed char minimum_free_regs[MAXBLOCK]; u_int needed_reg[MAXBLOCK]; uint64_t requires_32bit[MAXBLOCK]; u_int wont_dirty[MAXBLOCK]; @@ -1086,7 +1086,6 @@ void ll_clear(struct ll_entry **head) // Dereference the pointers and remove if it matches void ll_kill_pointers(struct ll_entry *head,int addr,int shift) { - u_int old_host_addr=0; while(head) { int ptr=get_pointer(head->addr); inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr); @@ -1095,20 +1094,12 @@ void ll_kill_pointers(struct ll_entry *head,int addr,int shift) { inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr); u_int host_addr=(u_int)kill_pointer(head->addr); - - if((host_addr>>12)!=(old_host_addr>>12)) { - #ifdef __arm__ - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif - old_host_addr=host_addr; - } + #ifdef __arm__ + needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + #endif } head=head->next; } - #ifdef __arm__ - if (old_host_addr) - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif } // This is called when we write to a compiled block (see do_invstub) @@ -1116,7 +1107,6 @@ void invalidate_page(u_int page) { struct ll_entry *head; struct ll_entry *next; - u_int old_host_addr=0; head=jump_in[page]; jump_in[page]=0; while(head!=NULL) { @@ -1131,21 +1121,13 @@ void invalidate_page(u_int page) while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr); u_int host_addr=(u_int)kill_pointer(head->addr); - - if((host_addr>>12)!=(old_host_addr>>12)) { - #ifdef __arm__ - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif - old_host_addr=host_addr; - } + #ifdef __arm__ + needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + #endif next=head->next; free(head); head=next; } - #ifdef __arm__ - if (old_host_addr) - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif } void invalidate_block(u_int block) { @@ -1192,6 +1174,9 @@ void invalidate_block(u_int block) for(first=page+1;first>12); } +// This is called when loading a save state. +// Anything could have changed, so invalidate everything. void invalidate_all_pages() { u_int page,n; @@ -1401,7 +1388,10 @@ void shift_alloc(struct regstat *current,int i) if(rs1[i]) alloc_reg(current,i,rs1[i]); if(rs2[i]) alloc_reg(current,i,rs2[i]); alloc_reg(current,i,rt1[i]); - if(rt1[i]==rs2[i]) alloc_reg_temp(current,i,-1); + if(rt1[i]==rs2[i]) { + alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; + } current->is32|=1LL<is32&=~(1LL<regmap,rt1[i])<0) { // dummy load, but we still need a register to calculate the address alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; } if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD { @@ -1616,6 +1610,7 @@ void load_alloc(struct regstat *current,int i) alloc_reg64(current,i,rt1[i]); alloc_all(current,i); alloc_reg64(current,i,FTEMP); + minimum_free_regs[i]=HOST_REGS; } else current->is32|=1LL<is32&=~(1LL<isconst=0; } @@ -1938,6 +1945,7 @@ static void pagespan_alloc(struct regstat *current,int i) current->isconst=0; current->wasconst=0; regs[i].wasconst=0; + minimum_free_regs[i]=HOST_REGS; alloc_all(current,i); alloc_cc(current,i); dirty_reg(current,CCREG); @@ -3176,14 +3184,6 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } - if(!using_tlb&&(!c||memtarget)) - // addr could be a temp, make sure it survives STORE*_STUB - reglist|=1<regmap,rs2[i],ccadj[i],reglist); - } if(!using_tlb) { if(!c||memtarget) { #ifdef DESTRUCTIVE_SHIFT @@ -3198,11 +3198,20 @@ void store_assemble(int i,struct regstat *i_regs) #else emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1); #endif + #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + emit_callne(invalidate_addr_reg[addr]); + #else jaddr2=(int)out; emit_jne(0); add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<regmap,rs2[i],ccadj[i],reglist); + } //if(opcode[i]==0x2B || opcode[i]==0x3F) //if(opcode[i]==0x2B || opcode[i]==0x28) //if(opcode[i]==0x2B || opcode[i]==0x29) @@ -3627,9 +3636,13 @@ void c1ls_assemble(int i,struct regstat *i_regs) #else emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1); #endif + #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + emit_callne(invalidate_addr_reg[temp]); + #else jaddr3=(int)out; emit_jne(0); add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - - if(ooo) - if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))|| - (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - - if(ooo) { + + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); s2l=get_reg(branch_regs[i].regmap,rs2[i]); @@ -5358,7 +5364,7 @@ void cjump_assemble(int i,struct regstat *i_regs) only32=(regs[i].was32>>rs1[i])&(regs[i].was32>>rs2[i])&1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -5697,11 +5703,9 @@ void sjump_assemble(int i,struct regstat *i_regs) int prev_cop1_usable=cop1_usable; int unconditional=0,nevertaken=0; int only32=0; - int ooo=1; int invert=0; int internal=internal_branch(branch_regs[i].is32,ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; @@ -5710,19 +5714,7 @@ void sjump_assemble(int i,struct regstat *i_regs) //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL) //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL) - if(ooo) { - if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - if(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31)) - // BxxZAL $ra is available to delay insn, so do it in order - ooo=0; - } - - if(ooo) { + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); } @@ -5744,7 +5736,7 @@ void sjump_assemble(int i,struct regstat *i_regs) only32=(regs[i].was32>>rs1[i])&1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -6037,25 +6029,15 @@ void fjump_assemble(int i,struct regstat *i_regs) assem_debug("fmatch=%d\n",match); int fs,cs; int eaddr; - int ooo=1; int invert=0; int internal=internal_branch(branch_regs[i].is32,ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - if(ooo) - if(itype[i+1]==FCOMP) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - - if(ooo) { + if(ooo[i]) { fs=get_reg(branch_regs[i].regmap,FSREG); address_generation(i+1,i_regs,regs[i].regmap_entry); // Is this okay? } @@ -6074,7 +6056,7 @@ void fjump_assemble(int i,struct regstat *i_regs) cop1_usable=1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); ds_assemble(i+1,i_regs); @@ -7932,7 +7914,8 @@ int new_recompile_block(int addr) /* Pass 1 disassembly */ for(i=0;!done;i++) { - bt[i]=0;likely[i]=0;op2=0; + bt[i]=0;likely[i]=0;ooo[i]=0;op2=0; + minimum_free_regs[i]=0; opcode[i]=op=source[i]>>26; switch(op) { @@ -8821,6 +8804,7 @@ int new_recompile_block(int addr) #endif //current.is32|=1LL<clean transition - // #ifdef DESTRUCTIVE_WRITEBACK here? + #ifdef DESTRUCTIVE_WRITEBACK if(t>0) if(get_reg(regmap_pre[t],f_regmap[hr])>=0) if((regs[t].wasdirty>>get_reg(regmap_pre[t],f_regmap[hr]))&1) f_regmap[hr]=-1; + #endif + // This check is only strictly required in the DESTRUCTIVE_WRITEBACK + // case above, however it's always a good idea. We can't hoist the + // load if the register was already allocated, so there's no point + // wasting time analyzing most of these cases. It only "succeeds" + // when the mapping was different and the load can be replaced with + // a mov, which is of negligible benefit. So such cases are + // skipped below. if(f_regmap[hr]>0) { - if(regs[t].regmap_entry[hr]<0) { + if(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0) { int r=f_regmap[hr]; for(j=t;j<=i;j++) { @@ -9868,6 +9858,7 @@ int new_recompile_block(int addr) // register is lower numbered than the lower-half // register. Not sure if it's worth fixing... if(get_reg(regs[j].regmap,r&63)<0) break; + if(get_reg(regs[j].regmap_entry,r&63)<0) break; if(regs[j].is32&(1LL<<(r&63))) break; } if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63)1&®s[k-1].regmap[hr]==-1) { - if(itype[k-1]==STORE||itype[k-1]==STORELR - ||itype[k-1]==C1LS||itype[k-1]==SHIFT||itype[k-1]==COP1 - ||itype[k-1]==FLOAT||itype[k-1]==FCONV||itype[k-1]==FCOMP - ||itype[k-1]==COP2||itype[k-1]==C2LS||itype[k-1]==C2OP) { - if(count_free_regs(regs[k-1].regmap)<2) { - //printf("no free regs for store %x\n",start+(k-1)*4); - break; - } + if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { + //printf("no free regs for store %x\n",start+(k-1)*4); + break; } - else - if(itype[k-1]!=NOP&&itype[k-1]!=MOV&&itype[k-1]!=ALU&&itype[k-1]!=SHIFTIMM&&itype[k-1]!=IMM16&&itype[k-1]!=LOAD) break; if(get_reg(regs[k-1].regmap,f_regmap[hr])>=0) { //printf("no-match due to different register\n"); break; @@ -9968,13 +9952,31 @@ int new_recompile_block(int addr) } } for(k=t;k>16)!=0x1000) { + regmap_pre[k+2][hr]=f_regmap[hr]; + regs[k+2].wasdirty&=~(1<>16)==0x1000) + { + // Stop on unconditional branch + break; + } + if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) + { + if(ooo[j]) { + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + break; + }else{ + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + break; + } + if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { + //printf("no-match due to different register (branch)\n"); break; } } - else if(itype[j]!=NOP&&itype[j]!=MOV&&itype[j]!=ALU&&itype[j]!=SHIFTIMM&&itype[j]!=IMM16&&itype[j]!=LOAD) break; + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) { + //printf("No free regs for store %x\n",start+j*4); + break; + } if(f_regmap[hr]>=64) { if(regs[j].is32&(1LL<<(f_regmap[hr]&63))) { break; @@ -10045,17 +10060,10 @@ int new_recompile_block(int addr) if(bt[i]) { for(j=i;j