X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=107a6304dbf5ab29931123068762d01919967d6a;hb=079ab0c6836e83bae50c55cad2baf733049f5136;hp=71fabb8bd383cfb13f67a55625817caf2e24c728;hpb=2330734fa3064bf3a159c3c56f9a2e005598360e;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 71fabb8b..107a6304 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -29,10 +29,6 @@ #ifdef _3DS #include <3ds_utils.h> #endif -#ifdef VITA -#include -static int sceBlock; -#endif #include "new_dynarec_config.h" #include "../psxhle.h" @@ -79,9 +75,17 @@ static int sceBlock; #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 +#ifdef VITA +// apparently Vita has a 16MB limit, so either we cut tc in half, +// or use this hack (it's a hack because tc size was designed to be power-of-2) +#define TC_REDUCE_BYTES 4096 +#else +#define TC_REDUCE_BYTES 0 +#endif + struct ndrc_mem { - u_char translation_cache[1 << TARGET_SIZE_2]; + u_char translation_cache[(1 << TARGET_SIZE_2) - TC_REDUCE_BYTES]; struct { struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; @@ -121,8 +125,8 @@ struct regstat uint64_t wasdirty; uint64_t dirty; uint64_t u; - u_int wasconst; - u_int isconst; + u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true + u_int isconst; // ... but isconst is false when r2 is known u_int loadedconst; // host regs that have constants loaded u_int waswritten; // MIPS regs that were used as store base before }; @@ -228,7 +232,7 @@ static struct decoded_insn static void *copy; static int expirep; static u_int stop_after_jal; - static u_int f1_hack; // 0 - off, ~0 - capture address, else addr + static u_int f1_hack; int new_dynarec_hacks; int new_dynarec_hacks_pergame; @@ -354,6 +358,14 @@ static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); +#ifdef VITA +#include +static int sceBlock; +// note: this interacts with RetroArch's Vita bootstrap code: bootstrap/vita/sbrk.c +extern int getVMBlock(); +int _newlib_vm_size_user = sizeof(*ndrc); +#endif + static void mprotect_w_x(void *start, void *end, int is_x) { #ifdef NO_WRITE_EXEC @@ -468,12 +480,12 @@ static void do_clear_cache(void) int cycle_multiplier = CYCLE_MULT_DEFAULT; // 100 for 1.0 int cycle_multiplier_override; int cycle_multiplier_old; +static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { - int m = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT - ? cycle_multiplier_override : cycle_multiplier; - int s=(x>>31)|1; + int m = cycle_multiplier_active; + int s = (x >> 31) | 1; return (x * m + s * 50) / 100; } @@ -1616,9 +1628,8 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) static void mov_alloc(struct regstat *current,int i) { if (dops[i].rs1 == HIREG || dops[i].rs1 == LOREG) { - // logically this is needed but just won't work, no idea why - //alloc_cc(current,i); // for stalls - //dirty_reg(current,CCREG); + alloc_cc(current,i); // for stalls + dirty_reg(current,CCREG); } // Note: Don't need to actually alloc the source registers @@ -4123,6 +4134,7 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) case SPAN: pagespan_assemble(i, i_regs); break; + case NOP: case OTHER: case NI: // not handled, just skip @@ -6789,7 +6801,7 @@ static void new_dynarec_test(void) SysPrintf("linkage_arm* miscompilation/breakage detected.\n"); } - SysPrintf("testing if we can run recompiled code...\n"); + SysPrintf("testing if we can run recompiled code @%p...\n", out); ((volatile u_int *)out)[0]++; // make cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { @@ -6827,6 +6839,7 @@ void new_dynarec_clear_full(void) literalcount=0; stop_after_jal=0; inv_code_start=inv_code_end=~0; + hack_addr=0; f1_hack=0; // TLB for(n=0;n<4096;n++) ll_clear(jump_in+n); @@ -6839,16 +6852,24 @@ void new_dynarec_clear_full(void) void new_dynarec_init(void) { - SysPrintf("Init new dynarec\n"); + SysPrintf("Init new dynarec, ndrc size %x\n", (int)sizeof(*ndrc)); +#ifdef _3DS + check_rosalina(); +#endif #ifdef BASE_ADDR_DYNAMIC #ifdef VITA - sceBlock = sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); - if (sceBlock < 0) - SysPrintf("sceKernelAllocMemBlockForVM failed\n"); + sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc)); + if (sceBlock <= 0) + SysPrintf("sceKernelAllocMemBlockForVM failed: %x\n", sceBlock); int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc); if (ret < 0) - SysPrintf("sceKernelGetMemBlockBase failed\n"); + SysPrintf("sceKernelGetMemBlockBase failed: %x\n", ret); + sceKernelOpenVMDomain(); + sceClibPrintf("translation_cache = 0x%08lx\n ", (long)ndrc->translation_cache); + #elif defined(_MSC_VER) + ndrc = VirtualAlloc(NULL, sizeof(*ndrc), MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); #else uintptr_t desired_addr = 0; #ifdef __ELF__ @@ -6866,7 +6887,8 @@ void new_dynarec_init(void) #else #ifndef NO_WRITE_EXEC // not all systems allow execute in data segment by default - if (mprotect(ndrc, sizeof(ndrc->translation_cache) + sizeof(ndrc->tramp.ops), + // size must be 4K aligned for 3DS? + if (mprotect(ndrc, sizeof(*ndrc), PROT_READ | PROT_WRITE | PROT_EXEC) != 0) SysPrintf("mprotect() failed: %s\n", strerror(errno)); #endif @@ -6890,8 +6912,9 @@ void new_dynarec_cleanup(void) int n; #ifdef BASE_ADDR_DYNAMIC #ifdef VITA - sceKernelFreeMemBlock(sceBlock); - sceBlock = -1; + // sceBlock is managed by retroarch's bootstrap code + //sceKernelFreeMemBlock(sceBlock); + //sceBlock = -1; #else if (munmap(ndrc, sizeof(*ndrc)) < 0) SysPrintf("munmap() failed\n"); @@ -6907,9 +6930,6 @@ void new_dynarec_cleanup(void) static u_int *get_source_start(u_int addr, u_int *limit) { - if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) - cycle_multiplier_override = 0; - if (addr < 0x00200000 || (0xa0000000 <= addr && addr < 0xa0200000)) { @@ -6924,7 +6944,7 @@ static u_int *get_source_start(u_int addr, u_int *limit) // BIOS. The multiplier should be much higher as it's uncached 8bit mem, // but timings in PCSX are too tied to the interpreter's BIAS if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) - cycle_multiplier_override = 200; + cycle_multiplier_active = 200; *limit = (addr & 0xfff00000) | 0x80000; return (u_int *)((u_char *)psxR + (addr&0x7ffff)); @@ -7036,6 +7056,43 @@ void new_dynarec_load_blocks(const void *save, int size) memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save)); } +static int apply_hacks(void) +{ + int i; + if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS)) + return 0; + /* special hack(s) */ + for (i = 0; i < slen - 4; i++) + { + // lui a4, 0xf200; jal ; addu a0, 2; slti v0, 28224 + if (source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP + && source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a + && imm[i+3] == 0x6e40 && dops[i+3].rs1 == 2) + { + SysPrintf("PE2 hack @%08x\n", start + (i+3)*4); + dops[i + 3].itype = NOP; + } + } + i = slen; + if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 + && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 + && dops[i-7].itype == STORE) + { + i = i-8; + if (dops[i].itype == IMM16) + i--; + // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6 + if (dops[i].itype == STORELR && dops[i].rs1 == 6 + && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) + { + SysPrintf("F1 hack from %08x, old dst %08x\n", start, hack_addr); + f1_hack = 1; + return 1; + } + } + return 0; +} + int new_recompile_block(u_int addr) { u_int pagelimit = 0; @@ -7071,9 +7128,11 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); return 0; } - else if (f1_hack == ~0u || (f1_hack != 0 && start == f1_hack)) { + else if (f1_hack && hack_addr == 0) { void *beginning = start_block(); u_int page = get_page(start); + emit_movimm(start, 0); + emit_writeword(0, &hack_addr); emit_readword(&psxRegs.GPR.n.sp, 0); emit_readptr(&mem_rtab, 1); emit_shrimm(0, 12, 2); @@ -7089,10 +7148,12 @@ int new_recompile_block(u_int addr) ll_add_flags(jump_in + page, start, state_rflags, beginning); SysPrintf("F1 hack to %08x\n", start); - f1_hack = start; return 0; } + cycle_multiplier_active = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT + ? cycle_multiplier_override : cycle_multiplier; + source = get_source_start(start, &pagelimit); if (source == NULL) { SysPrintf("Compile at bogus memory address: %08x\n", addr); @@ -7118,9 +7179,9 @@ int new_recompile_block(u_int addr) /* Pass 1 disassembly */ - for(i=0;!done;i++) { - dops[i].bt=0; - dops[i].ooo=0; + for (i = 0; !done; i++) + { + memset(&dops[i], 0, sizeof(dops[i])); op2=0; minimum_free_regs[i]=0; dops[i].opcode=op=source[i]>>26; @@ -7560,12 +7621,12 @@ int new_recompile_block(u_int addr) do_in_intrp=1; } } - if(do_in_intrp) { - dops[i-1].rs1=CCREG; - dops[i-1].rs2=dops[i-1].rt1=dops[i-1].rt2=0; - ba[i-1]=-1; - dops[i-1].itype=INTCALL; - done=2; + if (do_in_intrp) { + memset(&dops[i-1], 0, sizeof(dops[i-1])); + dops[i-1].itype = INTCALL; + dops[i-1].rs1 = CCREG; + ba[i-1] = -1; + done = 2; i--; // don't compile the DS } } @@ -7614,23 +7675,7 @@ int new_recompile_block(u_int addr) } assert(slen>0); - /* spacial hack(s) */ - if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008 - && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809 - && dops[i-7].itype == STORE) - { - i = i-8; - if (dops[i].itype == IMM16) - i--; - // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6 - if (dops[i].itype == STORELR && dops[i].rs1 == 6 - && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6) - { - SysPrintf("F1 hack from %08x\n", start); - if (f1_hack == 0) - f1_hack = ~0u; - } - } + int clear_hack_addr = apply_hacks(); /* Pass 2 - Register dependencies and branch targets */ @@ -7690,7 +7735,10 @@ int new_recompile_block(u_int addr) current.u=branch_unneeded_reg[i]&~((1LL<\n"); drc_dbg_emit_do_cmp(i, ccadj[i]); + if (clear_hack_addr) { + emit_movimm(0, 0); + emit_writeword(0, &hack_addr); + clear_hack_addr = 0; + } // load regs if(regs[i].regmap_entry[HOST_CCREG]==CCREG&®s[i].regmap[HOST_CCREG]!=CCREG) @@ -9196,7 +9250,7 @@ int new_recompile_block(u_int addr) load_regs(regs[i].regmap_entry,regs[i].regmap,dops[i+1].rs2,dops[i+1].rs2); } // TODO: if(is_ooo(i)) address_generation(i+1); - if (dops[i].itype == CJUMP) + if (!dops[i].is_jump || dops[i].itype == CJUMP) load_regs(regs[i].regmap_entry,regs[i].regmap,CCREG,CCREG); if (ram_offset && (dops[i].is_load || dops[i].is_store)) load_regs(regs[i].regmap_entry,regs[i].regmap,ROREG,ROREG);