X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=72af92d1395280cac711ed9bcdb2c922837d27f3;hp=fb6ace4725e1651034ad153ee5105d94ce445649;hb=dc990066a301c231e5982a346f4809b4e0247a51;hpb=dd3a91a17c7b45c56cde69ff39ea35185f476898 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index fb6ace47..72af92d1 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -84,6 +84,7 @@ struct ll_entry u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; char is_ds[MAXBLOCK]; + char ooo[MAXBLOCK]; uint64_t unneeded_reg[MAXBLOCK]; uint64_t unneeded_reg_upper[MAXBLOCK]; uint64_t branch_unneeded_reg[MAXBLOCK]; @@ -94,10 +95,9 @@ struct ll_entry signed char regmap[MAXBLOCK][HOST_REGS]; signed char regmap_entry[MAXBLOCK][HOST_REGS]; uint64_t constmap[MAXBLOCK][HOST_REGS]; - uint64_t known_value[HOST_REGS]; - u_int known_reg; struct regstat regs[MAXBLOCK]; struct regstat branch_regs[MAXBLOCK]; + signed char minimum_free_regs[MAXBLOCK]; u_int needed_reg[MAXBLOCK]; uint64_t requires_32bit[MAXBLOCK]; u_int wont_dirty[MAXBLOCK]; @@ -134,19 +134,21 @@ struct ll_entry #define CSREG 35 // Coprocessor status #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code -#define TEMPREG 38 -#define FTEMP 38 // FPU/LDL/LDR temporary register -#define PTEMP 39 // Prefetch temporary register -#define TLREG 40 // TLB mapping offset -#define RHASH 41 // Return address hash -#define RHTBL 42 // Return address hash table address -#define RTEMP 43 // JR/JALR address register -#define MAXREG 43 -#define AGEN1 44 // Address generation temporary register -#define AGEN2 45 // Address generation temporary register -#define MGEN1 46 // Maptable address generation temporary register -#define MGEN2 47 // Maptable address generation temporary register -#define BTREG 48 // Branch target temporary register +#define MMREG 38 // Pointer to memory_map +#define ROREG 39 // ram offset (if rdram!=0x80000000) +#define TEMPREG 40 +#define FTEMP 40 // FPU temporary register +#define PTEMP 41 // Prefetch temporary register +#define TLREG 42 // TLB mapping offset +#define RHASH 43 // Return address hash +#define RHTBL 44 // Return address hash table address +#define RTEMP 45 // JR/JALR address register +#define MAXREG 45 +#define AGEN1 46 // Address generation temporary register +#define AGEN2 47 // Address generation temporary register +#define MGEN1 48 // Maptable address generation temporary register +#define MGEN2 49 // Maptable address generation temporary register +#define BTREG 50 // Branch target temporary register /* instruction types */ #define NOP 0 // No operation @@ -1388,7 +1390,10 @@ void shift_alloc(struct regstat *current,int i) if(rs1[i]) alloc_reg(current,i,rs1[i]); if(rs2[i]) alloc_reg(current,i,rs2[i]); alloc_reg(current,i,rt1[i]); - if(rt1[i]==rs2[i]) alloc_reg_temp(current,i,-1); + if(rt1[i]==rs2[i]) { + alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; + } current->is32|=1LL<is32&=~(1LL<regmap,rt1[i])<0) { // dummy load, but we still need a register to calculate the address alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; } if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD { @@ -1603,6 +1612,7 @@ void load_alloc(struct regstat *current,int i) alloc_reg64(current,i,rt1[i]); alloc_all(current,i); alloc_reg64(current,i,FTEMP); + minimum_free_regs[i]=HOST_REGS; } else current->is32|=1LL<is32&=~(1LL<isconst=0; } @@ -1925,6 +1947,7 @@ static void pagespan_alloc(struct regstat *current,int i) current->isconst=0; current->wasconst=0; regs[i].wasconst=0; + minimum_free_regs[i]=HOST_REGS; alloc_all(current,i); alloc_cc(current,i); dirty_reg(current,CCREG); @@ -3163,14 +3186,6 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } - if(!using_tlb&&(!c||memtarget)) - // addr could be a temp, make sure it survives STORE*_STUB - reglist|=1<regmap,rs2[i],ccadj[i],reglist); - } if(!using_tlb) { if(!c||memtarget) { #ifdef DESTRUCTIVE_SHIFT @@ -3185,11 +3200,20 @@ void store_assemble(int i,struct regstat *i_regs) #else emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1); #endif + #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + emit_callne(invalidate_addr_reg[addr]); + #else jaddr2=(int)out; emit_jne(0); add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<regmap,rs2[i],ccadj[i],reglist); + } //if(opcode[i]==0x2B || opcode[i]==0x3F) //if(opcode[i]==0x2B || opcode[i]==0x28) //if(opcode[i]==0x2B || opcode[i]==0x29) @@ -3614,9 +3638,13 @@ void c1ls_assemble(int i,struct regstat *i_regs) #else emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1); #endif + #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + emit_callne(invalidate_addr_reg[temp]); + #else jaddr3=(int)out; emit_jne(0); add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - - if(ooo) - if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))|| - (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - - if(ooo) { + + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); s2l=get_reg(branch_regs[i].regmap,rs2[i]); @@ -5345,7 +5366,7 @@ void cjump_assemble(int i,struct regstat *i_regs) only32=(regs[i].was32>>rs1[i])&(regs[i].was32>>rs2[i])&1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -5684,11 +5705,9 @@ void sjump_assemble(int i,struct regstat *i_regs) int prev_cop1_usable=cop1_usable; int unconditional=0,nevertaken=0; int only32=0; - int ooo=1; int invert=0; int internal=internal_branch(branch_regs[i].is32,ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; @@ -5697,19 +5716,7 @@ void sjump_assemble(int i,struct regstat *i_regs) //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL) //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL) - if(ooo) { - if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - if(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31)) - // BxxZAL $ra is available to delay insn, so do it in order - ooo=0; - } - - if(ooo) { + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); } @@ -5731,7 +5738,7 @@ void sjump_assemble(int i,struct regstat *i_regs) only32=(regs[i].was32>>rs1[i])&1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -6024,25 +6031,15 @@ void fjump_assemble(int i,struct regstat *i_regs) assem_debug("fmatch=%d\n",match); int fs,cs; int eaddr; - int ooo=1; int invert=0; int internal=internal_branch(branch_regs[i].is32,ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - if(ooo) - if(itype[i+1]==FCOMP) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - - if(ooo) { + if(ooo[i]) { fs=get_reg(branch_regs[i].regmap,FSREG); address_generation(i+1,i_regs,regs[i].regmap_entry); // Is this okay? } @@ -6061,7 +6058,7 @@ void fjump_assemble(int i,struct regstat *i_regs) cop1_usable=1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); ds_assemble(i+1,i_regs); @@ -7712,20 +7709,10 @@ void disassemble_inst(int i) } } -void new_dynarec_init() +// clear the state completely, instead of just marking +// things invalid like invalidate_all_pages() does +void new_dynarec_clear_full() { - printf("Init new dynarec\n"); - out=(u_char *)BASE_ADDR; - if (mmap (out, 1<>2; for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF memory_map[n]=-1; + for(n=0;n<4096;n++) ll_clear(jump_in+n); + for(n=0;n<4096;n++) ll_clear(jump_out+n); + for(n=0;n<4096;n++) ll_clear(jump_dirty+n); +} + +void new_dynarec_init() +{ + printf("Init new dynarec\n"); + out=(u_char *)BASE_ADDR; + if (mmap (out, 1<>26; switch(op) { @@ -8808,6 +8818,7 @@ int new_recompile_block(int addr) #endif //current.is32|=1LL<clean transition - // #ifdef DESTRUCTIVE_WRITEBACK here? + #ifdef DESTRUCTIVE_WRITEBACK if(t>0) if(get_reg(regmap_pre[t],f_regmap[hr])>=0) if((regs[t].wasdirty>>get_reg(regmap_pre[t],f_regmap[hr]))&1) f_regmap[hr]=-1; + #endif + // This check is only strictly required in the DESTRUCTIVE_WRITEBACK + // case above, however it's always a good idea. We can't hoist the + // load if the register was already allocated, so there's no point + // wasting time analyzing most of these cases. It only "succeeds" + // when the mapping was different and the load can be replaced with + // a mov, which is of negligible benefit. So such cases are + // skipped below. if(f_regmap[hr]>0) { - if(regs[t].regmap_entry[hr]<0) { + if(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0) { int r=f_regmap[hr]; for(j=t;j<=i;j++) { @@ -9855,6 +9882,7 @@ int new_recompile_block(int addr) // register is lower numbered than the lower-half // register. Not sure if it's worth fixing... if(get_reg(regs[j].regmap,r&63)<0) break; + if(get_reg(regs[j].regmap_entry,r&63)<0) break; if(regs[j].is32&(1LL<<(r&63))) break; } if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63)1&®s[k-1].regmap[hr]==-1) { - if(itype[k-1]==STORE||itype[k-1]==STORELR - ||itype[k-1]==C1LS||itype[k-1]==SHIFT||itype[k-1]==COP1 - ||itype[k-1]==FLOAT||itype[k-1]==FCONV||itype[k-1]==FCOMP - ||itype[k-1]==COP2||itype[k-1]==C2LS||itype[k-1]==C2OP) { - if(count_free_regs(regs[k-1].regmap)<2) { - //printf("no free regs for store %x\n",start+(k-1)*4); - break; - } + if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { + //printf("no free regs for store %x\n",start+(k-1)*4); + break; } - else - if(itype[k-1]!=NOP&&itype[k-1]!=MOV&&itype[k-1]!=ALU&&itype[k-1]!=SHIFTIMM&&itype[k-1]!=IMM16&&itype[k-1]!=LOAD) break; if(get_reg(regs[k-1].regmap,f_regmap[hr])>=0) { //printf("no-match due to different register\n"); break; @@ -9955,13 +9976,31 @@ int new_recompile_block(int addr) } } for(k=t;k>16)!=0x1000) { + regmap_pre[k+2][hr]=f_regmap[hr]; + regs[k+2].wasdirty&=~(1<>16)==0x1000) + { + // Stop on unconditional branch + break; + } + if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) + { + if(ooo[j]) { + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + break; + }else{ + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + break; + } + if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { + //printf("no-match due to different register (branch)\n"); break; } } - else if(itype[j]!=NOP&&itype[j]!=MOV&&itype[j]!=ALU&&itype[j]!=SHIFTIMM&&itype[j]!=IMM16&&itype[j]!=LOAD) break; + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) { + //printf("No free regs for store %x\n",start+j*4); + break; + } if(f_regmap[hr]>=64) { if(regs[j].is32&(1LL<<(f_regmap[hr]&63))) { break; @@ -10032,17 +10084,10 @@ int new_recompile_block(int addr) if(bt[i]) { for(j=i;j