X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=6501d26a3fc379ae5a7303de263bf966c57fafd9;hp=3d6a2fef38a7dc70beebb4c60fdbc8fed2ea64a5;hb=e2b5e7aa45f75cd13ef238fa4ff9516891dabef5;hpb=1edfcc68047e356a9c57c4734cc3bbe084922ce7 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 3d6a2fef..6501d26a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -83,22 +83,29 @@ struct ll_entry struct ll_entry *next; }; - u_int start; - u_int *source; - char insn[MAXBLOCK][10]; - u_char itype[MAXBLOCK]; - u_char opcode[MAXBLOCK]; - u_char opcode2[MAXBLOCK]; - u_char bt[MAXBLOCK]; - u_char rs1[MAXBLOCK]; - u_char rs2[MAXBLOCK]; - u_char rt1[MAXBLOCK]; - u_char rt2[MAXBLOCK]; - u_char us1[MAXBLOCK]; - u_char us2[MAXBLOCK]; - u_char dep1[MAXBLOCK]; - u_char dep2[MAXBLOCK]; - u_char lt1[MAXBLOCK]; + // used by asm: + u_char *out; + u_int hash_table[65536][4] __attribute__((aligned(16))); + struct ll_entry *jump_in[4096] __attribute__((aligned(16))); + struct ll_entry *jump_dirty[4096]; + + static struct ll_entry *jump_out[4096]; + static u_int start; + static u_int *source; + static char insn[MAXBLOCK][10]; + static u_char itype[MAXBLOCK]; + static u_char opcode[MAXBLOCK]; + static u_char opcode2[MAXBLOCK]; + static u_char bt[MAXBLOCK]; + static u_char rs1[MAXBLOCK]; + static u_char rs2[MAXBLOCK]; + static u_char rt1[MAXBLOCK]; + static u_char rt2[MAXBLOCK]; + static u_char us1[MAXBLOCK]; + static u_char us2[MAXBLOCK]; + static u_char dep1[MAXBLOCK]; + static u_char dep2[MAXBLOCK]; + static u_char lt1[MAXBLOCK]; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; @@ -107,52 +114,47 @@ struct ll_entry static u_int smrv_weak; // same, but somewhat less likely static u_int smrv_strong_next; // same, but after current insn executes static u_int smrv_weak_next; - int imm[MAXBLOCK]; - u_int ba[MAXBLOCK]; - char likely[MAXBLOCK]; - char is_ds[MAXBLOCK]; - char ooo[MAXBLOCK]; - uint64_t unneeded_reg[MAXBLOCK]; - uint64_t unneeded_reg_upper[MAXBLOCK]; - uint64_t branch_unneeded_reg[MAXBLOCK]; - uint64_t branch_unneeded_reg_upper[MAXBLOCK]; - uint64_t pr32[MAXBLOCK]; - signed char regmap_pre[MAXBLOCK][HOST_REGS]; + static int imm[MAXBLOCK]; + static u_int ba[MAXBLOCK]; + static char likely[MAXBLOCK]; + static char is_ds[MAXBLOCK]; + static char ooo[MAXBLOCK]; + static uint64_t unneeded_reg[MAXBLOCK]; + static uint64_t unneeded_reg_upper[MAXBLOCK]; + static uint64_t branch_unneeded_reg[MAXBLOCK]; + static uint64_t branch_unneeded_reg_upper[MAXBLOCK]; + static signed char regmap_pre[MAXBLOCK][HOST_REGS]; static uint64_t current_constmap[HOST_REGS]; static uint64_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; - signed char minimum_free_regs[MAXBLOCK]; - u_int needed_reg[MAXBLOCK]; - u_int wont_dirty[MAXBLOCK]; - u_int will_dirty[MAXBLOCK]; - int ccadj[MAXBLOCK]; - int slen; - u_int instr_addr[MAXBLOCK]; - u_int link_addr[MAXBLOCK][3]; - int linkcount; - u_int stubs[MAXBLOCK*3][8]; - int stubcount; - u_int literals[1024][2]; - int literalcount; - int is_delayslot; - int cop1_usable; - u_char *out; - struct ll_entry *jump_in[4096] __attribute__((aligned(16))); - struct ll_entry *jump_out[4096]; - struct ll_entry *jump_dirty[4096]; - u_int hash_table[65536][4] __attribute__((aligned(16))); - char shadow[1048576] __attribute__((aligned(16))); - void *copy; - int expirep; - int new_dynarec_did_compile; - int new_dynarec_hacks; - u_int stop_after_jal; + static signed char minimum_free_regs[MAXBLOCK]; + static u_int needed_reg[MAXBLOCK]; + static u_int wont_dirty[MAXBLOCK]; + static u_int will_dirty[MAXBLOCK]; + static int ccadj[MAXBLOCK]; + static int slen; + static u_int instr_addr[MAXBLOCK]; + static u_int link_addr[MAXBLOCK][3]; + static int linkcount; + static u_int stubs[MAXBLOCK*3][8]; + static int stubcount; + static u_int literals[1024][2]; + static int literalcount; + static int is_delayslot; + static int cop1_usable; + static char shadow[1048576] __attribute__((aligned(16))); + static void *copy; + static int expirep; + static u_int stop_after_jal; #ifndef RAM_FIXED static u_int ram_offset; #else static const u_int ram_offset=0; #endif + + int new_dynarec_hacks; + int new_dynarec_did_compile; extern u_char restore_candidate[512]; extern int cycle_count; @@ -186,7 +188,7 @@ struct ll_entry #define STORE 2 // Store #define LOADLR 3 // Unaligned load #define STORELR 4 // Unaligned store -#define MOV 5 // Move +#define MOV 5 // Move #define ALU 6 // Arithmetic/logic #define MULTDIV 7 // Multiply/divide #define SHIFT 8 // Shift by register @@ -254,15 +256,20 @@ void jump_intcall(); void new_dyna_leave(); // Needed by assembler -void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32); -void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty); -void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr); -void load_all_regs(signed char i_regmap[]); -void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); -void load_regs_entry(int t); -void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); +static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32); +static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty); +static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr); +static void load_all_regs(signed char i_regmap[]); +static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); +static void load_regs_entry(int t); +static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); + +static int verify_dirty(u_int *ptr); +static int get_final_value(int hr, int i, int *value); +static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e); +static void add_to_linker(int addr,int target,int ext); -int tracedebug=0; +static int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 @@ -421,7 +428,7 @@ static void flush_dirty_uppers(struct regstat *cur) for (hr=0;hrdirty>>hr)&1) { reg=cur->regmap[hr]; - if(reg>=64) + if(reg>=64) if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; } } @@ -574,7 +581,7 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(ba[i-1]start+slen*4-4) @@ -677,7 +684,7 @@ int loop_reg(int i, int r, int hr) void alloc_all(struct regstat *cur,int i) { int hr; - + for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& @@ -789,7 +796,7 @@ void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || + if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); @@ -885,7 +892,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) #ifdef __arm__ do_clear_cache(); #endif - + // Don't trap writes invalid_code[block]=1; @@ -1747,7 +1754,7 @@ static void pagespan_alloc(struct regstat *current,int i) //else ... } -add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e) +static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e) { stubs[stubcount][0]=type; stubs[stubcount][1]=addr; @@ -3028,7 +3035,7 @@ void storelr_assemble(int i,struct regstat *i_regs) int map=get_reg(i_regs->regmap,ROREG); if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); #else - if((u_int)rdram!=0x80000000) + if((u_int)rdram!=0x80000000) emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); #endif @@ -3600,7 +3607,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) int agr=AGEN1+(i&1); if(itype[i]==LOAD) { ra=get_reg(i_regs->regmap,rt1[i]); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } if(itype[i]==LOADLR) { @@ -3703,7 +3710,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } -int get_final_value(int hr, int i, int *value) +static int get_final_value(int hr, int i, int *value) { int reg=regs[i].regmap[hr]; while(i>hr)&1) { if(i_regmap[hr]>s1l)&(branch_regs[i].is32>>rs1[i])&1) emit_loadreg(rs1[i],s1l); - } + } else { if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) emit_loadreg(rs2[i],s1l); @@ -4554,7 +4561,7 @@ void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n][2]); // return address - + /* This works but uses a lot of memory... emit_readword((int)&last_count,ECX); emit_add(HOST_CCREG,ECX,EAX); @@ -4584,11 +4591,11 @@ void do_ccstub(int n) emit_jmpreg(EAX);*/ } -add_to_linker(int addr,int target,int ext) +static void add_to_linker(int addr,int target,int ext) { link_addr[linkcount][0]=addr; link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + link_addr[linkcount][2]=ext; linkcount++; } @@ -4614,7 +4621,7 @@ static void ujump_assemble_write_ra(int i) #endif { #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4635,10 +4642,10 @@ void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(rt1[i]==31&&temp>=0) { int return_address=start+i*4+8; - if(get_reg(branch_regs[i].regmap,31)>0) + if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif @@ -4689,7 +4696,7 @@ static void rjump_assemble_write_ra(int i) assert(rt>=0); return_address=start+i*4+8; #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4718,7 +4725,7 @@ void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { int return_address=start+i*4+8; @@ -4850,7 +4857,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); @@ -4905,7 +4912,7 @@ void cjump_assemble(int i,struct regstat *i_regs) load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); //assem_debug("cycle count (adj)\n"); @@ -4977,7 +4984,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if(opcode[i]==4) // BEQ @@ -5107,7 +5114,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(1); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -5290,7 +5297,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); assem_debug("cycle count (adj)\n"); @@ -5377,7 +5384,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } } // if(!only32) - + if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { @@ -5625,7 +5632,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { } } // if(!only32) - + if(invert) { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -6133,14 +6140,14 @@ void unneeded_registers(int istart,int iend,int r) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=(start+slen*4)) { // Branch out of this block, flush all regs u=1; uu=1; gte_u=gte_u_unknown; - /* Hexagon hack + /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 @@ -6730,7 +6737,7 @@ void clean_registers(int istart,int iend,int wr) if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<istart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -7140,7 +7147,7 @@ int new_recompile_block(int addr) assem_debug("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) + //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); /*if(Count>=312978186) { @@ -7196,7 +7203,7 @@ int new_recompile_block(int addr) unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); - + /* Pass 1 disassembly */ for(i=0;!done;i++) { @@ -7849,7 +7856,7 @@ int new_recompile_block(int addr) /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); - + /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status @@ -7876,7 +7883,7 @@ int new_recompile_block(int addr) unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } - + for(i=0;i=0;i--) { int hr; @@ -8892,7 +8899,7 @@ int new_recompile_block(int addr) } // Save it needed_reg[i]=nr; - + // Deallocate unneeded registers for(hr=0;hr=start && ba[i]<(start+i*4)) + if(ba[i]>=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS @@ -9071,10 +9078,10 @@ int new_recompile_block(int addr) } } if(ooo[i]) { - if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ - if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; } // Avoid dirty->clean transition @@ -9244,10 +9251,10 @@ int new_recompile_block(int addr) if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -9320,7 +9327,7 @@ int new_recompile_block(int addr) regs[k].isconst&=~(1<i&&f_regmap[HOST_CCREG]==CCREG) @@ -9362,7 +9369,7 @@ int new_recompile_block(int addr) } } } - + // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET @@ -9542,7 +9549,7 @@ int new_recompile_block(int addr) } } #endif - + // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0) { @@ -9677,7 +9684,7 @@ int new_recompile_block(int addr) } } if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) { - if(itype[i+1]==LOAD) + if(itype[i+1]==LOAD) hr=get_reg(regs[i+1].regmap,rt1[i+1]); if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); @@ -9701,10 +9708,10 @@ int new_recompile_block(int addr) } } } - + /* Pass 6 - Optimize clean/dirty state */ clean_registers(0,slen-1,1); - + /* Pass 7 - Identify 32-bit registers */ for (i=slen-1;i>=0;i--) { @@ -10165,15 +10172,15 @@ int new_recompile_block(int addr) //printf("shadow buffer: %x-%x\n",(int)copy,(int)copy+slen*4); memcpy(copy,source,slen*4); copy+=slen*4; - + #ifdef __arm__ __clear_cache((void *)beginning,out); #endif - + // If we're within 256K of the end of the buffer, // start over from the beginning. (Is 256K enough?) if((u_int)out>(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; @@ -10186,9 +10193,9 @@ int new_recompile_block(int addr) invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; - + /* Pass 10 - Free memory by expiring oldest blocks */ - + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { @@ -10230,7 +10237,7 @@ int new_recompile_block(int addr) case 3: // Clear jump_out #ifdef __arm__ - if((expirep&2047)==0) + if((expirep&2047)==0) do_clear_cache(); #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);