X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=8ae158965f2818afa7630a06e3f203cbee3e257a;hb=670c0f224c8effa022c7504572f2e3951e61e7eb;hp=bcbc00952ce3b374bc39c02646a4c6fa014099f4;hpb=761fdd0a4ef77be843ff7a8fb10d33a4401965d5;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index bcbc0095..8ae15896 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -29,10 +29,6 @@ #ifdef _3DS #include <3ds_utils.h> #endif -#ifdef VITA -#include -static int sceBlock; -#endif #include "new_dynarec_config.h" #include "../psxhle.h" @@ -53,6 +49,7 @@ static int sceBlock; //#define DISASM //#define ASSEM_PRINT +//#define REG_ALLOC_PRINT #ifdef ASSEM_PRINT #define assem_debug printf @@ -79,9 +76,17 @@ static int sceBlock; #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +#ifdef VITA +// apparently Vita has a 16MB limit, so either we cut tc in half, +// or use this hack (it's a hack because tc size was designed to be power-of-2) +#define TC_REDUCE_BYTES 4096 +#else +#define TC_REDUCE_BYTES 0 +#endif + struct ndrc_mem { - u_char translation_cache[1 << TARGET_SIZE_2]; + u_char translation_cache[(1 << TARGET_SIZE_2) - TC_REDUCE_BYTES]; struct { struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; @@ -114,9 +119,13 @@ enum stub_type { INVCODE_STUB = 14, }; +// regmap_pre[i] - regs before [i] insn starts; dirty things here that +// don't match .regmap will be written back +// [i].regmap_entry - regs that must be set up if someone jumps here +// [i].regmap - regs [i] insn will read/(over)write struct regstat { - signed char regmap_entry[HOST_REGS]; // pre-insn + loop preloaded regs? + signed char regmap_entry[HOST_REGS]; signed char regmap[HOST_REGS]; uint64_t wasdirty; uint64_t dirty; @@ -202,7 +211,7 @@ static struct decoded_insn static u_int ba[MAXBLOCK]; static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; - // pre-instruction [i], excluding loop-preload regs? + // see 'struct regstat' for a description static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // contains 'real' consts at [i] insn, but may differ from what's actually // loaded in host reg as 'final' value is always loaded, see get_final_value() @@ -228,7 +237,7 @@ static struct decoded_insn static void *copy; static int expirep; static u_int stop_after_jal; - static u_int f1_hack; // 0 - off, ~0 - capture address, else addr + static u_int f1_hack; int new_dynarec_hacks; int new_dynarec_hacks_pergame; @@ -293,7 +302,7 @@ static struct decoded_insn //#define FLOAT 19 // Floating point unit //#define FCONV 20 // Convert integer to float //#define FCOMP 21 // Floating point compare (sets FSREG) -#define SYSCALL 22// SYSCALL +#define SYSCALL 22// SYSCALL,BREAK #define OTHER 23 // Other #define SPAN 24 // Branch/delay slot spans 2 pages #define NI 25 // Not implemented @@ -324,6 +333,10 @@ void verify_code_ds(); void cc_interrupt(); void fp_exception(); void fp_exception_ds(); +void jump_syscall (u_int u0, u_int u1, u_int pc); +void jump_syscall_ds(u_int u0, u_int u1, u_int pc); +void jump_break (u_int u0, u_int u1, u_int pc); +void jump_break_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); @@ -354,6 +367,14 @@ static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); +#ifdef VITA +#include +static int sceBlock; +// note: this interacts with RetroArch's Vita bootstrap code: bootstrap/vita/sbrk.c +extern int getVMBlock(); +int _newlib_vm_size_user = sizeof(*ndrc); +#endif + static void mprotect_w_x(void *start, void *end, int is_x) { #ifdef NO_WRITE_EXEC @@ -582,10 +603,9 @@ void *get_addr_ht(u_int vaddr) return get_addr(vaddr); } -void clear_all_regs(signed char regmap[]) +static void clear_all_regs(signed char regmap[]) { - int hr; - for (hr=0;hrregmap, CCREG); + assert(ccreg == HOST_CCREG); + emit_movimm(start + i*4, 2); // pc + emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); + emit_far_jump(func); } static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) @@ -6113,8 +6144,21 @@ static void pagespan_ds() load_regs_bt(regs[0].regmap,regs[0].dirty,start+4); } +static void check_regmap(signed char *regmap) +{ +#ifndef NDEBUG + int i,j; + for (i = 0; i < HOST_REGS; i++) { + if (regmap[i] < 0) + continue; + for (j = i + 1; j < HOST_REGS; j++) + assert(regmap[i] != regmap[j]); + } +#endif +} + // Basic liveness analysis for MIPS registers -void unneeded_registers(int istart,int iend,int r) +static void unneeded_registers(int istart,int iend,int r) { int i; uint64_t u,gte_u,b,gte_b; @@ -6827,6 +6871,7 @@ void new_dynarec_clear_full(void) literalcount=0; stop_after_jal=0; inv_code_start=inv_code_end=~0; + hack_addr=0; f1_hack=0; // TLB for(n=0;n<4096;n++) ll_clear(jump_in+n); @@ -6839,7 +6884,7 @@ void new_dynarec_clear_full(void) void new_dynarec_init(void) { - SysPrintf("Init new dynarec\n"); + SysPrintf("Init new dynarec, ndrc size %x\n", (int)sizeof(*ndrc)); #ifdef _3DS check_rosalina(); @@ -6847,11 +6892,11 @@ void new_dynarec_init(void) #ifdef BASE_ADDR_DYNAMIC #ifdef VITA sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc)); - if (sceBlock < 0) - SysPrintf("sceKernelAllocMemBlockForVM failed\n"); + if (sceBlock <= 0) + SysPrintf("sceKernelAllocMemBlockForVM failed: %x\n", sceBlock); int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc); if (ret < 0) - SysPrintf("sceKernelGetMemBlockBase failed\n"); + SysPrintf("sceKernelGetMemBlockBase failed: %x\n", ret); sceKernelOpenVMDomain(); sceClibPrintf("translation_cache = 0x%08lx\n ", (long)ndrc->translation_cache); #elif defined(_MSC_VER) @@ -6899,8 +6944,9 @@ void new_dynarec_cleanup(void) int n; #ifdef BASE_ADDR_DYNAMIC #ifdef VITA - sceKernelFreeMemBlock(sceBlock); - sceBlock = -1; + // sceBlock is managed by retroarch's bootstrap code + //sceKernelFreeMemBlock(sceBlock); + //sceBlock = -1; #else if (munmap(ndrc, sizeof(*ndrc)) < 0) SysPrintf("munmap() failed\n"); @@ -7042,11 +7088,11 @@ void new_dynarec_load_blocks(const void *save, int size) memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save)); } -static void apply_hacks(void) +static int apply_hacks(void) { int i; if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS)) - return; + return 0; /* special hack(s) */ for (i = 0; i < slen - 4; i++) { @@ -7071,11 +7117,12 @@ static void apply_hacks(void) if (dops[i].itype == STORELR && dops[i].rs1 == 6 && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) { - SysPrintf("F1 hack from %08x\n", start); - if (f1_hack == 0) - f1_hack = ~0u; + SysPrintf("F1 hack from %08x, old dst %08x\n", start, hack_addr); + f1_hack = 1; + return 1; } } + return 0; } int new_recompile_block(u_int addr) @@ -7113,9 +7160,11 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); return 0; } - else if (f1_hack == ~0u || (f1_hack != 0 && start == f1_hack)) { + else if (f1_hack && hack_addr == 0) { void *beginning = start_block(); u_int page = get_page(start); + emit_movimm(start, 0); + emit_writeword(0, &hack_addr); emit_readword(&psxRegs.GPR.n.sp, 0); emit_readptr(&mem_rtab, 1); emit_shrimm(0, 12, 2); @@ -7131,7 +7180,6 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in + page, start, state_rflags, beginning); SysPrintf("F1 hack to %08x\n", start); - f1_hack = start; return 0; } @@ -7184,7 +7232,7 @@ int new_recompile_block(u_int addr) case 0x08: strcpy(insn[i],"JR"); type=RJUMP; break; case 0x09: strcpy(insn[i],"JALR"); type=RJUMP; break; case 0x0C: strcpy(insn[i],"SYSCALL"); type=SYSCALL; break; - case 0x0D: strcpy(insn[i],"BREAK"); type=OTHER; break; + case 0x0D: strcpy(insn[i],"BREAK"); type=SYSCALL; break; case 0x0F: strcpy(insn[i],"SYNC"); type=OTHER; break; case 0x10: strcpy(insn[i],"MFHI"); type=MOV; break; case 0x11: strcpy(insn[i],"MTHI"); type=MOV; break; @@ -7630,9 +7678,9 @@ int new_recompile_block(u_int addr) // Don't get too close to the limit if(i>MAXBLOCK/2) done=1; } - if(dops[i].itype==SYSCALL&&stop_after_jal) done=1; - if(dops[i].itype==HLECALL||dops[i].itype==INTCALL) done=2; - if(done==2) { + if (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL) + done = stop_after_jal ? 1 : 2; + if (done == 2) { // Does the block continue due to a branch? for(j=i-1;j>=0;j--) { @@ -7659,7 +7707,7 @@ int new_recompile_block(u_int addr) } assert(slen>0); - apply_hacks(); + int clear_hack_addr = apply_hacks(); /* Pass 2 - Register dependencies and branch targets */ @@ -7668,14 +7716,16 @@ int new_recompile_block(u_int addr) /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status - current.dirty=0; - current.u=unneeded_reg[0]; + clear_all_regs(current.regmap_entry); clear_all_regs(current.regmap); - alloc_reg(¤t,0,CCREG); - dirty_reg(¤t,CCREG); - current.isconst=0; - current.wasconst=0; - current.waswritten=0; + current.wasdirty = current.dirty = 0; + current.u = unneeded_reg[0]; + alloc_reg(¤t, 0, CCREG); + dirty_reg(¤t, CCREG); + current.wasconst = 0; + current.isconst = 0; + current.loadedconst = 0; + current.waswritten = 0; int ds=0; int cc=0; int hr=-1; @@ -7706,6 +7756,9 @@ int new_recompile_block(u_int addr) memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap)); regs[i].wasconst=current.isconst; regs[i].wasdirty=current.dirty; + regs[i].dirty=0; + regs[i].u=0; + regs[i].isconst=0; regs[i].loadedconst=0; if (!dops[i].is_jump) { if(i+1 %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r); int k; if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) { + if(get_reg(regs[i].regmap,f_regmap[hr])>=0) break; if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break; - if(r>63) { - if(get_reg(regs[i].regmap,r&63)<0) break; - if(get_reg(branch_regs[i].regmap,r&63)<0) break; - } k=i; while(k>1&®s[k-1].regmap[hr]==-1) { if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) { @@ -8629,7 +8683,6 @@ int new_recompile_block(u_int addr) if(k>2&&(dops[k-3].itype==UJUMP||dops[k-3].itype==RJUMP)&&dops[k-3].rt1==31) { break; } - assert(r < 64); k--; } if(regs[k-1].regmap[hr]==f_regmap[hr]&®map_pre[k][hr]==f_regmap[hr]) { @@ -8908,7 +8961,10 @@ int new_recompile_block(u_int addr) if(get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1); if(hr<0) hr=get_reg(regs[i+1].regmap,-1); - else {regs[i+1].regmap[hr]=AGEN1+((i+1)&1);regs[i+1].isconst&=~(1<=0); if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -9005,7 +9061,7 @@ int new_recompile_block(u_int addr) dops[slen-1].bt=1; // Mark as a branch target so instruction can restart after exception } -#ifdef DISASM +#ifdef REG_ALLOC_PRINT /* Debug/disassembly */ for(i=0;i\n"); drc_dbg_emit_do_cmp(i, ccadj[i]); + if (clear_hack_addr) { + emit_movimm(0, 0); + emit_writeword(0, &hack_addr); + clear_hack_addr = 0; + } // load regs if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG)