X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=6f5ca8f7a14914f577bef289bf799aefed13258e;hp=28a0245f249063e8a0aa85bccad231500db9bd10;hb=b12c9fb877aa40666d5ad367ecca152cfa2edb64;hpb=535d208a8473e9255919b1e5bfe0b5aa88f6992a diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 28a0245f..6f5ca8f7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -84,6 +84,7 @@ struct ll_entry u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; char is_ds[MAXBLOCK]; + char ooo[MAXBLOCK]; uint64_t unneeded_reg[MAXBLOCK]; uint64_t unneeded_reg_upper[MAXBLOCK]; uint64_t branch_unneeded_reg[MAXBLOCK]; @@ -94,10 +95,9 @@ struct ll_entry signed char regmap[MAXBLOCK][HOST_REGS]; signed char regmap_entry[MAXBLOCK][HOST_REGS]; uint64_t constmap[MAXBLOCK][HOST_REGS]; - uint64_t known_value[HOST_REGS]; - u_int known_reg; struct regstat regs[MAXBLOCK]; struct regstat branch_regs[MAXBLOCK]; + signed char minimum_free_regs[MAXBLOCK]; u_int needed_reg[MAXBLOCK]; uint64_t requires_32bit[MAXBLOCK]; u_int wont_dirty[MAXBLOCK]; @@ -121,7 +121,11 @@ struct ll_entry char shadow[1048576] __attribute__((aligned(16))); void *copy; int expirep; +#ifndef PCSX u_int using_tlb; +#else + static const u_int using_tlb=0; +#endif u_int stop_after_jal; extern u_char restore_candidate[512]; extern int cycle_count; @@ -134,19 +138,21 @@ struct ll_entry #define CSREG 35 // Coprocessor status #define CCREG 36 // Cycle count #define INVCP 37 // Pointer to invalid_code -#define TEMPREG 38 -#define FTEMP 38 // FPU/LDL/LDR temporary register -#define PTEMP 39 // Prefetch temporary register -#define TLREG 40 // TLB mapping offset -#define RHASH 41 // Return address hash -#define RHTBL 42 // Return address hash table address -#define RTEMP 43 // JR/JALR address register -#define MAXREG 43 -#define AGEN1 44 // Address generation temporary register -#define AGEN2 45 // Address generation temporary register -#define MGEN1 46 // Maptable address generation temporary register -#define MGEN2 47 // Maptable address generation temporary register -#define BTREG 48 // Branch target temporary register +#define MMREG 38 // Pointer to memory_map +#define ROREG 39 // ram offset (if rdram!=0x80000000) +#define TEMPREG 40 +#define FTEMP 40 // FPU temporary register +#define PTEMP 41 // Prefetch temporary register +#define TLREG 42 // TLB mapping offset +#define RHASH 43 // Return address hash +#define RHTBL 44 // Return address hash table address +#define RTEMP 45 // JR/JALR address register +#define MAXREG 45 +#define AGEN1 46 // Address generation temporary register +#define AGEN2 47 // Address generation temporary register +#define MGEN1 48 // Maptable address generation temporary register +#define MGEN2 49 // Maptable address generation temporary register +#define BTREG 50 // Branch target temporary register /* instruction types */ #define NOP 0 // No operation @@ -1086,7 +1092,6 @@ void ll_clear(struct ll_entry **head) // Dereference the pointers and remove if it matches void ll_kill_pointers(struct ll_entry *head,int addr,int shift) { - u_int old_host_addr=0; while(head) { int ptr=get_pointer(head->addr); inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr); @@ -1095,20 +1100,12 @@ void ll_kill_pointers(struct ll_entry *head,int addr,int shift) { inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr); u_int host_addr=(u_int)kill_pointer(head->addr); - - if((host_addr>>12)!=(old_host_addr>>12)) { - #ifdef __arm__ - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif - old_host_addr=host_addr; - } + #ifdef __arm__ + needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + #endif } head=head->next; } - #ifdef __arm__ - if (old_host_addr) - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif } // This is called when we write to a compiled block (see do_invstub) @@ -1116,7 +1113,6 @@ void invalidate_page(u_int page) { struct ll_entry *head; struct ll_entry *next; - u_int old_host_addr=0; head=jump_in[page]; jump_in[page]=0; while(head!=NULL) { @@ -1131,21 +1127,13 @@ void invalidate_page(u_int page) while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr); u_int host_addr=(u_int)kill_pointer(head->addr); - - if((host_addr>>12)!=(old_host_addr>>12)) { - #ifdef __arm__ - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif - old_host_addr=host_addr; - } + #ifdef __arm__ + needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + #endif next=head->next; free(head); head=next; } - #ifdef __arm__ - if (old_host_addr) - __clear_cache((void *)(old_host_addr&~0xfff),(void *)(old_host_addr|0xfff)); - #endif } void invalidate_block(u_int block) { @@ -1192,9 +1180,15 @@ void invalidate_block(u_int block) for(first=page+1;first>12)|page]=1; +#endif #ifndef DISABLE_TLB // If there is a valid TLB entry for this page, remove write protect if(tlb_LUT_w[block]) { @@ -1216,6 +1210,8 @@ void invalidate_addr(u_int addr) { invalidate_block(addr>>12); } +// This is called when loading a save state. +// Anything could have changed, so invalidate everything. void invalidate_all_pages() { u_int page,n; @@ -1401,7 +1397,10 @@ void shift_alloc(struct regstat *current,int i) if(rs1[i]) alloc_reg(current,i,rs1[i]); if(rs2[i]) alloc_reg(current,i,rs2[i]); alloc_reg(current,i,rt1[i]); - if(rt1[i]==rs2[i]) alloc_reg_temp(current,i,-1); + if(rt1[i]==rs2[i]) { + alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; + } current->is32|=1LL<is32&=~(1LL<regmap,rt1[i])<0) { // dummy load, but we still need a register to calculate the address alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; } if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD { @@ -1616,6 +1619,7 @@ void load_alloc(struct regstat *current,int i) alloc_reg64(current,i,rt1[i]); alloc_all(current,i); alloc_reg64(current,i,FTEMP); + minimum_free_regs[i]=HOST_REGS; } else current->is32|=1LL<is32&=~(1LL<isconst=0; } @@ -1938,6 +1954,7 @@ static void pagespan_alloc(struct regstat *current,int i) current->isconst=0; current->wasconst=0; regs[i].wasconst=0; + minimum_free_regs[i]=HOST_REGS; alloc_all(current,i); alloc_cc(current,i); dirty_reg(current,CCREG); @@ -2770,8 +2787,10 @@ void load_assemble(int i,struct regstat *i_regs) if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<=0) { c=(i_regs->wasconst>>s)&1; - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + if (c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + } } //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); @@ -3071,8 +3090,10 @@ void store_assemble(int i,struct regstat *i_regs) offset=imm[i]; if(s>=0) { c=(i_regs->wasconst>>s)&1; - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + } } assert(tl>=0); assert(temp>=0); @@ -3119,38 +3140,36 @@ void store_assemble(int i,struct regstat *i_regs) if (opcode[i]==0x28) { // SB if(!c||memtarget) { - int x=0; + int x=0,a=temp; #ifdef BIG_ENDIAN_MIPS if(!c) emit_xorimm(addr,3,temp); else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset); #else - if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset); - else if (addr!=temp) emit_mov(addr,temp); + if(!c) a=addr; #endif //gen_tlb_addr_w(temp,map); //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp); - emit_writebyte_indexed_tlb(tl,x,temp,map,temp); + emit_writebyte_indexed_tlb(tl,x,a,map,a); } type=STOREB_STUB; } if (opcode[i]==0x29) { // SH if(!c||memtarget) { - int x=0; + int x=0,a=temp; #ifdef BIG_ENDIAN_MIPS if(!c) emit_xorimm(addr,2,temp); else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset); #else - if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset); - else if (addr!=temp) emit_mov(addr,temp); + if(!c) a=addr; #endif //#ifdef //emit_writehword_indexed_tlb(tl,x,temp,map,temp); //#else if(map>=0) { - gen_tlb_addr_w(temp,map); - emit_writehword_indexed(tl,x,temp); + gen_tlb_addr_w(a,map); + emit_writehword_indexed(tl,x,a); }else - emit_writehword_indexed(tl,(int)rdram-0x80000000+x,temp); + emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); } type=STOREH_STUB; } @@ -3176,14 +3195,6 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } - if(!using_tlb&&(!c||memtarget)) - // addr could be a temp, make sure it survives STORE*_STUB - reglist|=1<regmap,rs2[i],ccadj[i],reglist); - } if(!using_tlb) { if(!c||memtarget) { #ifdef DESTRUCTIVE_SHIFT @@ -3198,11 +3209,20 @@ void store_assemble(int i,struct regstat *i_regs) #else emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1); #endif + #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) + emit_callne(invalidate_addr_reg[addr]); + #else jaddr2=(int)out; emit_jne(0); add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<regmap,rs2[i],ccadj[i],reglist); + } //if(opcode[i]==0x2B || opcode[i]==0x3F) //if(opcode[i]==0x2B || opcode[i]==0x28) //if(opcode[i]==0x2B || opcode[i]==0x29) @@ -3243,7 +3263,7 @@ void storelr_assemble(int i,struct regstat *i_regs) int jaddr=0,jaddr2; int case1,case2,case3; int done0,done1,done2; - int memtarget,c=0; + int memtarget=0,c=0; int agr=AGEN1+(i&1); u_int hr,reglist=0; th=get_reg(i_regs->regmap,rs2[i]|64); @@ -3254,8 +3274,10 @@ void storelr_assemble(int i,struct regstat *i_regs) offset=imm[i]; if(s>=0) { c=(i_regs->isconst>>s)&1; - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + } } assert(tl>=0); for(hr=0;hr>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - - if(ooo) - if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))|| - (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - - if(ooo) { + + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); s2l=get_reg(branch_regs[i].regmap,rs2[i]); @@ -5358,7 +5377,7 @@ void cjump_assemble(int i,struct regstat *i_regs) only32=(regs[i].was32>>rs1[i])&(regs[i].was32>>rs2[i])&1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -5697,11 +5716,9 @@ void sjump_assemble(int i,struct regstat *i_regs) int prev_cop1_usable=cop1_usable; int unconditional=0,nevertaken=0; int only32=0; - int ooo=1; int invert=0; int internal=internal_branch(branch_regs[i].is32,ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; @@ -5710,19 +5727,7 @@ void sjump_assemble(int i,struct regstat *i_regs) //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL) //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL) - if(ooo) { - if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - if(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31)) - // BxxZAL $ra is available to delay insn, so do it in order - ooo=0; - } - - if(ooo) { + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); } @@ -5744,7 +5749,7 @@ void sjump_assemble(int i,struct regstat *i_regs) only32=(regs[i].was32>>rs1[i])&1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -6037,25 +6042,15 @@ void fjump_assemble(int i,struct regstat *i_regs) assem_debug("fmatch=%d\n",match); int fs,cs; int eaddr; - int ooo=1; int invert=0; int internal=internal_branch(branch_regs[i].is32,ba[i]); if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); - if(likely[i]) ooo=0; if(!match) invert=1; #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - if(ooo) - if(itype[i+1]==FCOMP) - { - // Write-after-read dependency prevents out of order execution - // First test branch condition, then execute delay slot, then branch - ooo=0; - } - - if(ooo) { + if(ooo[i]) { fs=get_reg(branch_regs[i].regmap,FSREG); address_generation(i+1,i_regs,regs[i].regmap_entry); // Is this okay? } @@ -6074,7 +6069,7 @@ void fjump_assemble(int i,struct regstat *i_regs) cop1_usable=1; } - if(ooo) { + if(ooo[i]) { // Out of order execution (delay slot first) //printf("OOOE\n"); ds_assemble(i+1,i_regs); @@ -7725,20 +7720,10 @@ void disassemble_inst(int i) } } -void new_dynarec_init() +// clear the state completely, instead of just marking +// things invalid like invalidate_all_pages() does +void new_dynarec_clear_full() { - printf("Init new dynarec\n"); - out=(u_char *)BASE_ADDR; - if (mmap (out, 1<>2; for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF memory_map[n]=-1; + for(n=0;n<4096;n++) ll_clear(jump_in+n); + for(n=0;n<4096;n++) ll_clear(jump_out+n); + for(n=0;n<4096;n++) ll_clear(jump_dirty+n); +} + +void new_dynarec_init() +{ + printf("Init new dynarec\n"); + out=(u_char *)BASE_ADDR; + if (mmap (out, 1<>26; switch(op) { @@ -8821,6 +8831,7 @@ int new_recompile_block(int addr) #endif //current.is32|=1LL<clean transition - // #ifdef DESTRUCTIVE_WRITEBACK here? + #ifdef DESTRUCTIVE_WRITEBACK if(t>0) if(get_reg(regmap_pre[t],f_regmap[hr])>=0) if((regs[t].wasdirty>>get_reg(regmap_pre[t],f_regmap[hr]))&1) f_regmap[hr]=-1; + #endif + // This check is only strictly required in the DESTRUCTIVE_WRITEBACK + // case above, however it's always a good idea. We can't hoist the + // load if the register was already allocated, so there's no point + // wasting time analyzing most of these cases. It only "succeeds" + // when the mapping was different and the load can be replaced with + // a mov, which is of negligible benefit. So such cases are + // skipped below. if(f_regmap[hr]>0) { - if(regs[t].regmap_entry[hr]<0) { + if(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0) { int r=f_regmap[hr]; for(j=t;j<=i;j++) { @@ -9868,6 +9895,7 @@ int new_recompile_block(int addr) // register is lower numbered than the lower-half // register. Not sure if it's worth fixing... if(get_reg(regs[j].regmap,r&63)<0) break; + if(get_reg(regs[j].regmap_entry,r&63)<0) break; if(regs[j].is32&(1LL<<(r&63))) break; } if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63)1&®s[k-1].regmap[hr]==-1) { - if(itype[k-1]==STORE||itype[k-1]==STORELR - ||itype[k-1]==C1LS||itype[k-1]==SHIFT||itype[k-1]==COP1 - ||itype[k-1]==FLOAT||itype[k-1]==FCONV||itype[k-1]==FCOMP - ||itype[k-1]==COP2||itype[k-1]==C2LS||itype[k-1]==C2OP) { - if(count_free_regs(regs[k-1].regmap)<2) { - //printf("no free regs for store %x\n",start+(k-1)*4); - break; - } + if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { + //printf("no free regs for store %x\n",start+(k-1)*4); + break; } - else - if(itype[k-1]!=NOP&&itype[k-1]!=MOV&&itype[k-1]!=ALU&&itype[k-1]!=SHIFTIMM&&itype[k-1]!=IMM16&&itype[k-1]!=LOAD) break; if(get_reg(regs[k-1].regmap,f_regmap[hr])>=0) { //printf("no-match due to different register\n"); break; @@ -9968,13 +9989,31 @@ int new_recompile_block(int addr) } } for(k=t;k>16)!=0x1000) { + regmap_pre[k+2][hr]=f_regmap[hr]; + regs[k+2].wasdirty&=~(1<>16)==0x1000) + { + // Stop on unconditional branch + break; + } + if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) + { + if(ooo[j]) { + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + break; + }else{ + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + break; + } + if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { + //printf("no-match due to different register (branch)\n"); break; } } - else if(itype[j]!=NOP&&itype[j]!=MOV&&itype[j]!=ALU&&itype[j]!=SHIFTIMM&&itype[j]!=IMM16&&itype[j]!=LOAD) break; + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) { + //printf("No free regs for store %x\n",start+j*4); + break; + } if(f_regmap[hr]>=64) { if(regs[j].is32&(1LL<<(f_regmap[hr]&63))) { break; @@ -10045,17 +10097,10 @@ int new_recompile_block(int addr) if(bt[i]) { for(j=i;j>12)) + for(i=start>>12;i<=(start+slen*4)>>12;i++) + invalid_code[((u_int)0x80000000>>12)|i]=0; +#endif /* Pass 10 - Free memory by expiring oldest blocks */ @@ -11041,6 +11084,10 @@ int new_recompile_block(int addr) break; case 3: // Clear jump_out + #ifdef __arm__ + if((expirep&2047)==0) + do_clear_cache(); + #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift); break;