X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=cd63d2bf218c52bd1a6544f81e6a0b2c9f43c061;hp=5de7b9278b8dcaeebb588bf9288e028a1a828c67;hb=HEAD;hpb=684b6816254a31c40b0d11269aebb2a87fd79b74 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 5de7b927..b4295239 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef __MACH__ #include #endif @@ -37,6 +38,7 @@ static Jit g_jit; #include "new_dynarec_config.h" #include "../psxhle.h" #include "../psxinterpreter.h" +#include "../psxcounters.h" #include "../gte.h" #include "emu_if.h" // emulator interface #include "linkage_offsets.h" @@ -110,18 +112,16 @@ struct ndrc_mem struct ndrc_tramp tramp; }; -#ifdef BASE_ADDR_DYNAMIC static struct ndrc_mem *ndrc; -#else -static struct ndrc_mem ndrc_ __attribute__((aligned(4096))); -static struct ndrc_mem *ndrc = &ndrc_; +#ifndef BASE_ADDR_DYNAMIC +// reserve .bss space with upto 64k page size in mind +static char ndrc_bss[((sizeof(*ndrc) + 65535) & ~65535) + 65536]; #endif #ifdef TC_WRITE_OFFSET # ifdef __GLIBC__ # include # include # include -# include # endif static long ndrc_write_ofs; #define NDRC_WRITE_OFFSET(x) (void *)((char *)(x) + ndrc_write_ofs) @@ -647,6 +647,24 @@ static struct ht_entry *hash_table_get(u_int vaddr) return &hash_table[((vaddr>>16)^vaddr)&0xFFFF]; } +#define HASH_TABLE_BAD 0xbac + +static void hash_table_clear(void) +{ + struct ht_entry *ht_bin; + int i, j; + for (i = 0; i < ARRAY_SIZE(hash_table); i++) { + for (j = 0; j < ARRAY_SIZE(hash_table[i].vaddr); j++) { + hash_table[i].vaddr[j] = ~0; + hash_table[i].tcaddr[j] = (void *)(uintptr_t)HASH_TABLE_BAD; + } + } + // don't allow ~0 to hit + ht_bin = hash_table_get(~0); + for (j = 0; j < ARRAY_SIZE(ht_bin->vaddr); j++) + ht_bin->vaddr[j] = 1; +} + static void hash_table_add(u_int vaddr, void *tcaddr) { struct ht_entry *ht_bin = hash_table_get(vaddr); @@ -662,17 +680,30 @@ static void hash_table_remove(int vaddr) //printf("remove hash: %x\n",vaddr); struct ht_entry *ht_bin = hash_table_get(vaddr); if (ht_bin->vaddr[1] == vaddr) { - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; + ht_bin->vaddr[1] = ~0; + ht_bin->tcaddr[1] = (void *)(uintptr_t)HASH_TABLE_BAD; } if (ht_bin->vaddr[0] == vaddr) { ht_bin->vaddr[0] = ht_bin->vaddr[1]; ht_bin->tcaddr[0] = ht_bin->tcaddr[1]; - ht_bin->vaddr[1] = -1; - ht_bin->tcaddr[1] = NULL; + ht_bin->vaddr[1] = ~0; + ht_bin->tcaddr[1] = (void *)(uintptr_t)HASH_TABLE_BAD; } } +static void mini_ht_clear(void) +{ +#ifdef USE_MINI_HT + int i; + for (i = 0; i < ARRAY_SIZE(mini_ht) - 1; i++) { + mini_ht[i][0] = ~0; + mini_ht[i][1] = HASH_TABLE_BAD; + } + mini_ht[i][0] = 1; + mini_ht[i][1] = HASH_TABLE_BAD; +#endif +} + static void mark_invalid_code(u_int vaddr, u_int len, char invalid) { u_int vaddr_m = vaddr & 0x1fffffff; @@ -1567,9 +1598,7 @@ static int invalidate_range(u_int start, u_int end, } if (hit) { do_clear_cache(); -#ifdef USE_MINI_HT - memset(mini_ht, -1, sizeof(mini_ht)); -#endif + mini_ht_clear(); } if (inv_start <= (start_m & ~0xfff) && inv_end >= (start_m | 0xfff)) @@ -1626,10 +1655,8 @@ void new_dynarec_invalidate_all_pages(void) } } - #ifdef USE_MINI_HT - memset(mini_ht, -1, sizeof(mini_ht)); - #endif do_clear_cache(); + mini_ht_clear(); } // Add an entry to jump_out after making a link @@ -2114,28 +2141,18 @@ static void multdiv_alloc(struct regstat *current,int i) clear_const(current,dops[i].rs2); alloc_cc(current,i); // for stalls dirty_reg(current,CCREG); - if(dops[i].rs1&&dops[i].rs2) - { - current->u&=~(1LL<u&=~(1LL<u &= ~(1ull << HIREG); + current->u &= ~(1ull << LOREG); + alloc_reg(current, i, HIREG); + alloc_reg(current, i, LOREG); + dirty_reg(current, HIREG); + dirty_reg(current, LOREG); + if ((dops[i].opcode2 & 0x3e) == 0x1a || (dops[i].rs1 && dops[i].rs2)) // div(u) { - // Multiply by zero is zero. - // MIPS does not have a divide by zero exception. - alloc_reg(current,i,HIREG); - alloc_reg(current,i,LOREG); - dirty_reg(current,HIREG); - dirty_reg(current,LOREG); - if (dops[i].rs1 && ((dops[i].opcode2 & 0x3e) == 0x1a)) // div(u) 0 - alloc_reg(current, i, dops[i].rs1); + alloc_reg(current, i, dops[i].rs1); + alloc_reg(current, i, dops[i].rs2); } + // else multiply by zero is zero } #endif @@ -4955,6 +4972,10 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) emit_storereg(reg, 0); } } + if (dops[i].opcode == 0x0f) { // LUI + emit_movimm(cinfo[i].imm << 16, 0); + emit_storereg(dops[i].rt1, 0); + } emit_movimm(start+i*4,0); emit_writeword(0,&pcaddr); int cc = get_reg(regs[i].regmap_entry, CCREG); @@ -4970,8 +4991,18 @@ static void drc_dbg_emit_do_cmp(int i, int ccadj_) restore_regs(reglist); assem_debug("\\\\do_insn_cmp\n"); } +static void drc_dbg_emit_wb_dirtys(int i, const struct regstat *i_regs) +{ + // write-out non-consts, consts are likely different because of get_final_value() + if (i_regs->dirty & ~i_regs->loadedconst) { + assem_debug("/ drc_dbg_wb\n"); + wb_dirtys(i_regs->regmap, i_regs->dirty & ~i_regs->loadedconst); + assem_debug("\\ drc_dbg_wb\n"); + } +} #else #define drc_dbg_emit_do_cmp(x,y) +#define drc_dbg_emit_wb_dirtys(x,y) #endif // Used when a branch jumps into the delay slot of another branch @@ -5695,6 +5726,7 @@ static void cjump_assemble(int i, const struct regstat *i_regs) load_reg(regs[i].regmap,branch_regs[i].regmap,ROREG); load_regs(regs[i].regmap,branch_regs[i].regmap,CCREG,INVCP); ds_assemble(i+1,&branch_regs[i]); + drc_dbg_emit_wb_dirtys(i+1, &branch_regs[i]); cc=get_reg(branch_regs[i].regmap,CCREG); if(cc==-1) { emit_loadreg(CCREG,cc=HOST_CCREG); @@ -6186,6 +6218,12 @@ static noinline void new_dynarec_test(void) out = ndrc->translation_cache; } +static int get_cycle_multiplier(void) +{ + return Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT + ? Config.cycle_multiplier_override : Config.cycle_multiplier; +} + // clear the state completely, instead of just marking // things invalid like invalidate_all_pages() does void new_dynarec_clear_full(void) @@ -6193,9 +6231,9 @@ void new_dynarec_clear_full(void) int n; out = ndrc->translation_cache; memset(invalid_code,1,sizeof(invalid_code)); - memset(hash_table,0xff,sizeof(hash_table)); - memset(mini_ht,-1,sizeof(mini_ht)); memset(shadow,0,sizeof(shadow)); + hash_table_clear(); + mini_ht_clear(); copy=shadow; expirep = EXPIRITY_OFFSET; pending_exception=0; @@ -6213,13 +6251,30 @@ void new_dynarec_clear_full(void) stat_clear(stat_blocks); stat_clear(stat_links); + if (cycle_multiplier_old != Config.cycle_multiplier + || new_dynarec_hacks_old != new_dynarec_hacks) + { + SysPrintf("ndrc config: mul=%d, ha=%x, pex=%d\n", + get_cycle_multiplier(), new_dynarec_hacks, Config.PreciseExceptions); + } cycle_multiplier_old = Config.cycle_multiplier; new_dynarec_hacks_old = new_dynarec_hacks; } +static int pgsize(void) +{ +#ifdef _SC_PAGESIZE + return sysconf(_SC_PAGESIZE); +#else + return 4096; +#endif +} + void new_dynarec_init(void) { - SysPrintf("Init new dynarec, ndrc size %x\n", (int)sizeof(*ndrc)); + int align = pgsize() - 1; + SysPrintf("Init new dynarec, ndrc size %x, pgsize %d\n", + (int)sizeof(*ndrc), align + 1); #ifdef _3DS check_rosalina(); @@ -6275,11 +6330,12 @@ void new_dynarec_init(void) #endif #else #ifndef NO_WRITE_EXEC + ndrc = (struct ndrc_mem *)((size_t)(ndrc_bss + align) & ~align); // not all systems allow execute in data segment by default // size must be 4K aligned for 3DS? if (mprotect(ndrc, sizeof(*ndrc), PROT_READ | PROT_WRITE | PROT_EXEC) != 0) - SysPrintf("mprotect() failed: %s\n", strerror(errno)); + SysPrintf("mprotect(%p) failed: %s\n", ndrc, strerror(errno)); #endif #endif out = ndrc->translation_cache; @@ -6335,9 +6391,9 @@ static u_int *get_source_start(u_int addr, u_int *limit) *limit = (addr & 0xa0600000) + 0x00200000; return (u_int *)(psxM + (addr & 0x1fffff)); } - else if (!Config.HLE && ( + else if ( /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/ - (0xbfc00000 <= addr && addr < 0xbfc80000))) + (0xbfc00000 <= addr && addr < 0xbfc80000)) { // BIOS. The multiplier should be much higher as it's uncached 8bit mem, // but timings in PCSX are too tied to the interpreter's 2-per-insn assumption @@ -6483,6 +6539,15 @@ void new_dynarec_print_stats(void) #endif } +static void force_intcall(int i) +{ + memset(&dops[i], 0, sizeof(dops[i])); + dops[i].itype = INTCALL; + dops[i].rs1 = CCREG; + dops[i].is_exception = 1; + cinfo[i].ba = -1; +} + static int apply_hacks(void) { int i; @@ -6517,22 +6582,29 @@ static int apply_hacks(void) return 1; } } + if (Config.HLE) + { + if (start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < start + i*4) + { + i = (psxRegs.biosBranchCheck - start) / 4u + 23; + if (dops[i].is_jump && !dops[i+1].bt) + { + force_intcall(i); + dops[i+1].is_ds = 0; + } + } + } return 0; } -static int is_ld_use_hazard(int ld_rt, const struct decoded_insn *op) +static int is_ld_use_hazard(const struct decoded_insn *op_ld, + const struct decoded_insn *op) { - return ld_rt != 0 && (ld_rt == op->rs1 || ld_rt == op->rs2) - && op->itype != LOADLR && op->itype != CJUMP && op->itype != SJUMP; -} - -static void force_intcall(int i) -{ - memset(&dops[i], 0, sizeof(dops[i])); - dops[i].itype = INTCALL; - dops[i].rs1 = CCREG; - dops[i].is_exception = 1; - cinfo[i].ba = -1; + if (op_ld->rt1 == 0 || (op_ld->rt1 != op->rs1 && op_ld->rt1 != op->rs2)) + return 0; + if (op_ld->itype == LOADLR && op->itype == LOADLR) + return op_ld->rt1 == op_ld->rs1; + return op->itype != CJUMP && op->itype != SJUMP; } static void disassemble_one(int i, u_int src) @@ -6915,7 +6987,7 @@ static noinline void pass1_disassemble(u_int pagelimit) else dop = &dops[t]; } - if ((dop && is_ld_use_hazard(dops[i].rt1, dop)) + if ((dop && is_ld_use_hazard(&dops[i], dop)) || (!dop && Config.PreciseExceptions)) { // jump target wants DS result - potential load delay effect SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); @@ -6932,7 +7004,7 @@ static noinline void pass1_disassemble(u_int pagelimit) } } else if (i > 0 && dops[i-1].is_delay_load - && is_ld_use_hazard(dops[i-1].rt1, &dops[i]) + && is_ld_use_hazard(&dops[i-1], &dops[i]) && (i < 2 || !dops[i-2].is_ujump)) { SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); for (j = i - 1; j > 0 && dops[j-1].is_delay_load; j--) @@ -6989,9 +7061,9 @@ static noinline void pass1_disassemble(u_int pagelimit) done = 1; } if (dops[i].itype == HLECALL) - stop = 1; + done = 1; else if (dops[i].itype == INTCALL) - stop = 2; + done = 2; else if (dops[i].is_exception) done = stop_after_jal ? 1 : 2; if (done == 2) { @@ -8396,6 +8468,7 @@ static noinline void pass5b_preallocate2(void) regs[i+2].wasdirty&=~(1<=0); + #if 0 // what is this for? double allocs $0 in ps1_rom.bin if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { regs[i].regmap[hr]=dops[i+1].rs1; @@ -8407,6 +8480,7 @@ static noinline void pass5b_preallocate2(void) regs[i+1].wasdirty&=~(1<