X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=2ca0f60448ecbadf04cc170d7c09f9456ffa5da7;hb=d1150cd66676ce43b8451c65818c2dc3e2f8a1d6;hp=f1005db85b9c52319478cb16034e717b3563a1e2;hpb=9a3ccfeb31dad024ab5c249c870866570414b2ba;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f1005db8..2ca0f604 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -29,10 +29,6 @@ #ifdef _3DS #include <3ds_utils.h> #endif -#ifdef VITA -#include -static int sceBlock; -#endif #include "new_dynarec_config.h" #include "../psxhle.h" @@ -53,6 +49,7 @@ static int sceBlock; //#define DISASM //#define ASSEM_PRINT +//#define REG_ALLOC_PRINT #ifdef ASSEM_PRINT #define assem_debug printf @@ -79,9 +76,17 @@ static int sceBlock; #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +#ifdef VITA +// apparently Vita has a 16MB limit, so either we cut tc in half, +// or use this hack (it's a hack because tc size was designed to be power-of-2) +#define TC_REDUCE_BYTES 4096 +#else +#define TC_REDUCE_BYTES 0 +#endif + struct ndrc_mem { - u_char translation_cache[1 << TARGET_SIZE_2]; + u_char translation_cache[(1 << TARGET_SIZE_2) - TC_REDUCE_BYTES]; struct { struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; @@ -121,8 +126,8 @@ struct regstat uint64_t wasdirty; uint64_t dirty; uint64_t u; - u_int wasconst; - u_int isconst; + u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true + u_int isconst; // ... but isconst is false when r2 is known u_int loadedconst; // host regs that have constants loaded u_int waswritten; // MIPS regs that were used as store base before }; @@ -228,7 +233,7 @@ static struct decoded_insn static void *copy; static int expirep; static u_int stop_after_jal; - static u_int f1_hack; // 0 - off, ~0 - capture address, else addr + static u_int f1_hack; int new_dynarec_hacks; int new_dynarec_hacks_pergame; @@ -293,7 +298,7 @@ static struct decoded_insn //#define FLOAT 19 // Floating point unit //#define FCONV 20 // Convert integer to float //#define FCOMP 21 // Floating point compare (sets FSREG) -#define SYSCALL 22// SYSCALL +#define SYSCALL 22// SYSCALL,BREAK #define OTHER 23 // Other #define SPAN 24 // Branch/delay slot spans 2 pages #define NI 25 // Not implemented @@ -324,6 +329,10 @@ void verify_code_ds(); void cc_interrupt(); void fp_exception(); void fp_exception_ds(); +void jump_syscall (u_int u0, u_int u1, u_int pc); +void jump_syscall_ds(u_int u0, u_int u1, u_int pc); +void jump_break (u_int u0, u_int u1, u_int pc); +void jump_break_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); @@ -354,6 +363,14 @@ static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); +#ifdef VITA +#include +static int sceBlock; +// note: this interacts with RetroArch's Vita bootstrap code: bootstrap/vita/sbrk.c +extern int getVMBlock(); +int _newlib_vm_size_user = sizeof(*ndrc); +#endif + static void mprotect_w_x(void *start, void *end, int is_x) { #ifdef NO_WRITE_EXEC @@ -468,12 +485,12 @@ static void do_clear_cache(void) int cycle_multiplier = CYCLE_MULT_DEFAULT; // 100 for 1.0 int cycle_multiplier_override; int cycle_multiplier_old; +static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { - int m = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT - ? cycle_multiplier_override : cycle_multiplier; - int s=(x>>31)|1; + int m = cycle_multiplier_active; + int s = (x >> 31) | 1; return (x * m + s * 50) / 100; } @@ -914,6 +931,10 @@ static const struct { FUNCNAME(jump_handler_write32), FUNCNAME(invalidate_addr), FUNCNAME(jump_to_new_pc), + FUNCNAME(jump_break), + FUNCNAME(jump_break_ds), + FUNCNAME(jump_syscall), + FUNCNAME(jump_syscall_ds), FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), @@ -3923,9 +3944,16 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) { - emit_movimm(0x20,0); // cause code - emit_movimm(0,1); // not in delay slot - call_c_cpu_handler(i, i_regs, ccadj_, start+i*4, psxException); + // 'break' tends to be littered around to catch things like + // division by 0 and is almost never executed, so don't emit much code here + void *func = (dops[i].opcode2 == 0x0C) + ? (is_delayslot ? jump_syscall_ds : jump_syscall) + : (is_delayslot ? jump_break_ds : jump_break); + signed char ccreg = get_reg(i_regs->regmap, CCREG); + assert(ccreg == HOST_CCREG); + emit_movimm(start + i*4, 2); // pc + emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); + emit_far_jump(func); } static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) @@ -4122,6 +4150,7 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) case SPAN: pagespan_assemble(i, i_regs); break; + case NOP: case OTHER: case NI: // not handled, just skip @@ -6788,7 +6817,7 @@ static void new_dynarec_test(void) SysPrintf("linkage_arm* miscompilation/breakage detected.\n"); } - SysPrintf("testing if we can run recompiled code...\n"); + SysPrintf("testing if we can run recompiled code @%p...\n", out); ((volatile u_int *)out)[0]++; // make cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { @@ -6826,6 +6855,7 @@ void new_dynarec_clear_full(void) literalcount=0; stop_after_jal=0; inv_code_start=inv_code_end=~0; + hack_addr=0; f1_hack=0; // TLB for(n=0;n<4096;n++) ll_clear(jump_in+n); @@ -6838,16 +6868,24 @@ void new_dynarec_clear_full(void) void new_dynarec_init(void) { - SysPrintf("Init new dynarec\n"); + SysPrintf("Init new dynarec, ndrc size %x\n", (int)sizeof(*ndrc)); +#ifdef _3DS + check_rosalina(); +#endif #ifdef BASE_ADDR_DYNAMIC #ifdef VITA - sceBlock = sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); - if (sceBlock < 0) - SysPrintf("sceKernelAllocMemBlockForVM failed\n"); + sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc)); + if (sceBlock <= 0) + SysPrintf("sceKernelAllocMemBlockForVM failed: %x\n", sceBlock); int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc); if (ret < 0) - SysPrintf("sceKernelGetMemBlockBase failed\n"); + SysPrintf("sceKernelGetMemBlockBase failed: %x\n", ret); + sceKernelOpenVMDomain(); + sceClibPrintf("translation_cache = 0x%08lx\n ", (long)ndrc->translation_cache); + #elif defined(_MSC_VER) + ndrc = VirtualAlloc(NULL, sizeof(*ndrc), MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); #else uintptr_t desired_addr = 0; #ifdef __ELF__ @@ -6865,7 +6903,8 @@ void new_dynarec_init(void) #else #ifndef NO_WRITE_EXEC // not all systems allow execute in data segment by default - if (mprotect(ndrc, sizeof(ndrc->translation_cache) + sizeof(ndrc->tramp.ops), + // size must be 4K aligned for 3DS? + if (mprotect(ndrc, sizeof(*ndrc), PROT_READ | PROT_WRITE | PROT_EXEC) != 0) SysPrintf("mprotect() failed: %s\n", strerror(errno)); #endif @@ -6889,8 +6928,9 @@ void new_dynarec_cleanup(void) int n; #ifdef BASE_ADDR_DYNAMIC #ifdef VITA - sceKernelFreeMemBlock(sceBlock); - sceBlock = -1; + // sceBlock is managed by retroarch's bootstrap code + //sceKernelFreeMemBlock(sceBlock); + //sceBlock = -1; #else if (munmap(ndrc, sizeof(*ndrc)) < 0) SysPrintf("munmap() failed\n"); @@ -6906,9 +6946,6 @@ void new_dynarec_cleanup(void) static u_int *get_source_start(u_int addr, u_int *limit) { - if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) - cycle_multiplier_override = 0; - if (addr < 0x00200000 || (0xa0000000 <= addr && addr < 0xa0200000)) { @@ -6923,7 +6960,7 @@ static u_int *get_source_start(u_int addr, u_int *limit) // BIOS. The multiplier should be much higher as it's uncached 8bit mem, // but timings in PCSX are too tied to the interpreter's BIAS if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) - cycle_multiplier_override = 200; + cycle_multiplier_active = 200; *limit = (addr & 0xfff00000) | 0x80000; return (u_int *)((u_char *)psxR + (addr&0x7ffff)); @@ -7035,6 +7072,43 @@ void new_dynarec_load_blocks(const void *save, int size) memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save)); } +static int apply_hacks(void) +{ + int i; + if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS)) + return 0; + /* special hack(s) */ + for (i = 0; i < slen - 4; i++) + { + // lui a4, 0xf200; jal ; addu a0, 2; slti v0, 28224 + if (source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP + && source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a + && imm[i+3] == 0x6e40 && dops[i+3].rs1 == 2) + { + SysPrintf("PE2 hack @%08x\n", start + (i+3)*4); + dops[i + 3].itype = NOP; + } + } + i = slen; + if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 + && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 + && dops[i-7].itype == STORE) + { + i = i-8; + if (dops[i].itype == IMM16) + i--; + // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6 + if (dops[i].itype == STORELR && dops[i].rs1 == 6 + && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) + { + SysPrintf("F1 hack from %08x, old dst %08x\n", start, hack_addr); + f1_hack = 1; + return 1; + } + } + return 0; +} + int new_recompile_block(u_int addr) { u_int pagelimit = 0; @@ -7070,9 +7144,11 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); return 0; } - else if (f1_hack == ~0u || (f1_hack != 0 && start == f1_hack)) { + else if (f1_hack && hack_addr == 0) { void *beginning = start_block(); u_int page = get_page(start); + emit_movimm(start, 0); + emit_writeword(0, &hack_addr); emit_readword(&psxRegs.GPR.n.sp, 0); emit_readptr(&mem_rtab, 1); emit_shrimm(0, 12, 2); @@ -7088,10 +7164,12 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in + page, start, state_rflags, beginning); SysPrintf("F1 hack to %08x\n", start); - f1_hack = start; return 0; } + cycle_multiplier_active = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT + ? cycle_multiplier_override : cycle_multiplier; + source = get_source_start(start, &pagelimit); if (source == NULL) { SysPrintf("Compile at bogus memory address: %08x\n", addr); @@ -7117,9 +7195,9 @@ int new_recompile_block(u_int addr) /* Pass 1 disassembly */ - for(i=0;!done;i++) { - dops[i].bt=0; - dops[i].ooo=0; + for (i = 0; !done; i++) + { + memset(&dops[i], 0, sizeof(dops[i])); op2=0; minimum_free_regs[i]=0; dops[i].opcode=op=source[i]>>26; @@ -7138,7 +7216,7 @@ int new_recompile_block(u_int addr) case 0x08: strcpy(insn[i],"JR"); type=RJUMP; break; case 0x09: strcpy(insn[i],"JALR"); type=RJUMP; break; case 0x0C: strcpy(insn[i],"SYSCALL"); type=SYSCALL; break; - case 0x0D: strcpy(insn[i],"BREAK"); type=OTHER; break; + case 0x0D: strcpy(insn[i],"BREAK"); type=SYSCALL; break; case 0x0F: strcpy(insn[i],"SYNC"); type=OTHER; break; case 0x10: strcpy(insn[i],"MFHI"); type=MOV; break; case 0x11: strcpy(insn[i],"MTHI"); type=MOV; break; @@ -7559,12 +7637,12 @@ int new_recompile_block(u_int addr) do_in_intrp=1; } } - if(do_in_intrp) { - dops[i-1].rs1=CCREG; - dops[i-1].rs2=dops[i-1].rt1=dops[i-1].rt2=0; - ba[i-1]=-1; - dops[i-1].itype=INTCALL; - done=2; + if (do_in_intrp) { + memset(&dops[i-1], 0, sizeof(dops[i-1])); + dops[i-1].itype = INTCALL; + dops[i-1].rs1 = CCREG; + ba[i-1] = -1; + done = 2; i--; // don't compile the DS } } @@ -7584,9 +7662,9 @@ int new_recompile_block(u_int addr) // Don't get too close to the limit if(i>MAXBLOCK/2) done=1; } - if(dops[i].itype==SYSCALL&&stop_after_jal) done=1; - if(dops[i].itype==HLECALL||dops[i].itype==INTCALL) done=2; - if(done==2) { + if (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL) + done = stop_after_jal ? 1 : 2; + if (done == 2) { // Does the block continue due to a branch? for(j=i-1;j>=0;j--) { @@ -7613,23 +7691,7 @@ int new_recompile_block(u_int addr) } assert(slen>0); - /* spacial hack(s) */ - if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 - && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 - && dops[i-7].itype == STORE) - { - i = i-8; - if (dops[i].itype == IMM16) - i--; - // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6 - if (dops[i].itype == STORELR && dops[i].rs1 == 6 - && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) - { - SysPrintf("F1 hack from %08x\n", start); - if (f1_hack == 0) - f1_hack = ~0u; - } - } + int clear_hack_addr = apply_hacks(); /* Pass 2 - Register dependencies and branch targets */ @@ -7689,7 +7751,10 @@ int new_recompile_block(u_int addr) current.u=branch_unneeded_reg[i]&~((1LL<\n"); drc_dbg_emit_do_cmp(i, ccadj[i]); + if (clear_hack_addr) { + emit_movimm(0, 0); + emit_writeword(0, &hack_addr); + clear_hack_addr = 0; + } // load regs if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG)