From: notaz Date: Sat, 20 Nov 2021 22:06:52 +0000 (+0200) Subject: drc,interpreter: add mul/div stalls X-Git-Tag: r23~68 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=32631e6a5d44d1e6aa5d53d5777a039b2d3d4300;p=pcsx_rearmed.git drc,interpreter: add mul/div stalls for games like Zero Divide, which will run at twice speed if there is not enough combined slowdown (probably from muldiv/gte/cache misses). --- diff --git a/frontend/libretro.c b/frontend/libretro.c index 940ff05a..4c285cfb 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -1063,6 +1063,7 @@ static void update_variables(bool in_flight) } } #endif + psxCpu->ApplyConfig(); var.value = "NULL"; var.key = "pcsx_rearmed_spu_reverb"; diff --git a/frontend/main.c b/frontend/main.c index 3ec252f0..4631618e 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -634,7 +634,7 @@ int main(int argc, char *argv[]) } if (ready_to_go) { - menu_prepare_emu(0); + menu_prepare_emu(); // If a state has been specified, then load that if (loadst) { diff --git a/frontend/menu.c b/frontend/menu.c index 1d21dacf..76d0e868 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -398,6 +398,7 @@ static const struct { CE_CONFIG_VAL(RCntFix), CE_CONFIG_VAL(VSyncWA), CE_CONFIG_VAL(icache_emulation), + CE_CONFIG_VAL(DisableStalls), CE_CONFIG_VAL(Cpu), CE_INTVAL(region), CE_INTVAL_V(g_scaler, 3), @@ -1556,7 +1557,7 @@ static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFA static const char h_cfg_nosmc[] = "Will cause crashes when loading, break memcards"; static const char h_cfg_gteunn[] = "May cause graphical glitches"; static const char h_cfg_gteflgs[] = "Will cause graphical glitches"; -static const char h_cfg_gtestll[] = "Some games will run too fast"; +static const char h_cfg_stalls[] = "Will cause some games to run too fast"; static menu_entry e_menu_speed_hacks[] = { @@ -1564,7 +1565,7 @@ static menu_entry e_menu_speed_hacks[] = mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), - mee_onoff_h ("Disable GTE stalls", 0, new_dynarec_hacks, NDHACK_GTE_NO_STALL, h_cfg_gtestll), + mee_onoff_h ("Disable CPU/GTE stalls", 0, Config.DisableStalls, 1, h_cfg_stalls), mee_end, }; @@ -2331,11 +2332,8 @@ static void menu_leave_emu(void); void menu_loop(void) { - int cycle_multiplier_old = cycle_multiplier; - int ndrc_hacks_old = new_dynarec_hacks; static int warned_about_bios = 0; static int sel = 0; - int ndrc_changed; menu_leave_emu(); @@ -2370,9 +2368,7 @@ void menu_loop(void) in_set_config_int(0, IN_CFG_BLOCKING, 0); - ndrc_changed = cycle_multiplier_old != cycle_multiplier - || ndrc_hacks_old != new_dynarec_hacks; - menu_prepare_emu(ndrc_changed); + menu_prepare_emu(); } static int qsort_strcmp(const void *p1, const void *p2) @@ -2624,7 +2620,7 @@ static void menu_leave_emu(void) cpu_clock = plat_target_cpu_clock_get(); } -void menu_prepare_emu(int ndrc_config_changed) +void menu_prepare_emu(void) { R3000Acpu *prev_cpu = psxCpu; @@ -2641,8 +2637,8 @@ void menu_prepare_emu(int ndrc_config_changed) // note that this does not really reset, just clears drc caches psxCpu->Reset(); } - else if (ndrc_config_changed) - new_dynarec_clear_full(); + + psxCpu->ApplyConfig(); // core doesn't care about Config.Cdda changes, // so handle them manually here diff --git a/frontend/menu.h b/frontend/menu.h index 9defc1ea..81cd1baf 100644 --- a/frontend/menu.h +++ b/frontend/menu.h @@ -1,5 +1,5 @@ void menu_init(void); -void menu_prepare_emu(int ndrc_config_changed); +void menu_prepare_emu(void); void menu_loop(void); void menu_finish(void); diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index d3428225..6b3b299f 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -275,7 +275,7 @@ INLINE u32 DIVIDE(u16 n, u16 d) { #ifndef FLAGLESS -const char gte_cycletab[64] = { +const unsigned char gte_cycletab[64] = { /* 1 2 3 4 5 6 7 8 9 a b c d e f */ 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, @@ -429,10 +429,19 @@ void gteLWC2() { } void gteSWC2() { - gteCheckStall(0); psxMemWrite32(_oB_, MFC2(_Rt_)); } +void gteLWC2_stall() { + gteCheckStall(0); + gteLWC2(); +} + +void gteSWC2_stall() { + gteCheckStall(0); + gteSWC2(); +} + #endif // FLAGLESS #if 0 diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index 9ad73d57..75e9e5b3 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -67,7 +67,7 @@ extern "C" { struct psxCP2Regs; -extern const char gte_cycletab[64]; +extern const unsigned char gte_cycletab[64]; int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs); void gteCheckStall(u32 op); @@ -78,6 +78,8 @@ void gteMTC2(); void gteCTC2(); void gteLWC2(); void gteSWC2(); +void gteLWC2_stall(); +void gteSWC2_stall(); void gteRTPS(struct psxCP2Regs *regs); void gteOP(struct psxCP2Regs *regs); diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 186d0af4..d68aea6c 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1986,7 +1986,7 @@ static void do_dirty_stub_ds() static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) { save_regs_all(reglist); - cop2_call_stall_check(op, i, i_regs, 0); + cop2_do_stall_check(op, i, i_regs, 0); #ifdef PCNT emit_movimm(op, 0); emit_far_call(pcnt_gte_start); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 17517eff..070c80fc 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1788,7 +1788,7 @@ static void get_bounds(void *addr, u_char **start, u_char **end) static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist) { save_load_regs_all(1, reglist); - cop2_call_stall_check(op, i, i_regs, 0); + cop2_do_stall_check(op, i, i_regs, 0); #ifdef PCNT emit_movimm(op, 0); emit_far_call(pcnt_gte_start); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 0d6e58d3..f170be7e 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -405,6 +405,20 @@ static void ari64_notify(int note, void *data) { } #endif +static void ari64_apply_config() +{ + if (Config.DisableStalls) + new_dynarec_hacks |= NDHACK_NO_STALLS; + else + new_dynarec_hacks &= ~NDHACK_NO_STALLS; + + if (cycle_multiplier != cycle_multiplier_old + || new_dynarec_hacks != new_dynarec_hacks_old) + { + new_dynarec_clear_full(); + } +} + static void ari64_shutdown() { new_dynarec_cleanup(); @@ -420,6 +434,7 @@ R3000Acpu psxRec = { #ifdef ICACHE_EMULATION ari64_notify, #endif + ari64_apply_config, ari64_shutdown }; @@ -431,7 +446,9 @@ unsigned int next_interupt; int new_dynarec_did_compile; int cycle_multiplier; int cycle_multiplier_override; +int cycle_multiplier_old; int new_dynarec_hacks_pergame; +int new_dynarec_hacks_old; int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h index 4c75e6c0..916bb1a8 100644 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ b/libpcsxcore/new_dynarec/linkage_offsets.h @@ -23,8 +23,9 @@ #define LO_interrupt (LO_cycle + 4) #define LO_intCycle (LO_interrupt + 4) #define LO_gteBusyCycle (LO_intCycle + 256) -#define LO_psxRegs_reserved (LO_gteBusyCycle + 4) -#define LO_psxRegs_end (LO_psxRegs_reserved + 4*3) +#define LO_muldivBusyCycle (LO_gteBusyCycle + 4) +#define LO_psxRegs_reserved (LO_muldivBusyCycle + 4) +#define LO_psxRegs_end (LO_psxRegs_reserved + 4*2) #define LO_rcnts (LO_psxRegs_end) #define LO_rcnts_end (LO_rcnts + 7*4*4) #define LO_inv_code_start (LO_rcnts_end) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index e0cff62c..f45322a8 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -47,11 +47,19 @@ static int sceBlock; #ifndef min #define min(a, b) ((b) < (a) ? (b) : (a)) #endif +#ifndef max +#define max(a, b) ((b) > (a) ? (b) : (a)) +#endif //#define DISASM -//#define assem_debug printf -//#define inv_debug printf +//#define ASSEM_PRINT + +#ifdef ASSEM_PRINT +#define assem_debug printf +#else #define assem_debug(...) +#endif +//#define inv_debug printf #define inv_debug(...) #ifdef __i386__ @@ -222,6 +230,7 @@ struct link_entry int new_dynarec_hacks; int new_dynarec_hacks_pergame; + int new_dynarec_hacks_old; int new_dynarec_did_compile; #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) @@ -336,7 +345,7 @@ static void add_to_linker(void *addr, u_int target, int ext); static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override); static void *get_direct_memhandler(void *table, u_int addr, enum stub_type type, uintptr_t *addr_host); -static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); +static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist); static void pass_args(int a0, int a1); static void emit_far_jump(const void *f); static void emit_far_call(const void *f); @@ -454,6 +463,7 @@ static void do_clear_cache(void) int cycle_multiplier; // 100 for 1.0 int cycle_multiplier_override; +int cycle_multiplier_old; static int CLOCK_ADJUST(int x) { @@ -905,7 +915,7 @@ static void host_tempreg_acquire(void) {} static void host_tempreg_release(void) {} #endif -#ifdef DRC_DBG +#ifdef ASSEM_PRINT extern void gen_interupt(); extern void do_insn_cmp(); #define FUNCNAME(f) { f, " " #f } @@ -929,7 +939,9 @@ static const struct { FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), +#ifdef DRC_DBG FUNCNAME(do_insn_cmp), +#endif #ifdef __arm__ FUNCNAME(verify_code), #endif @@ -1600,6 +1612,12 @@ static void alloc_reg_temp(struct regstat *cur,int i,signed char reg) static void mov_alloc(struct regstat *current,int i) { + if (rs1[i] == HIREG || rs1[i] == LOREG) { + // logically this is needed but just won't work, no idea why + //alloc_cc(current,i); // for stalls + //dirty_reg(current,CCREG); + } + // Note: Don't need to actually alloc the source registers //alloc_reg(current,i,rs1[i]); alloc_reg(current,i,rt1[i]); @@ -1863,6 +1881,7 @@ void multdiv_alloc(struct regstat *current,int i) // case 0x1F: DDIVU clear_const(current,rs1[i]); clear_const(current,rs2[i]); + alloc_cc(current,i); // for stalls if(rs1[i]&&rs2[i]) { if((opcode2[i]&4)==0) // 32-bit @@ -3314,7 +3333,6 @@ static void log_gte_stall(int stall, u_int cycle) { if ((u_int)stall <= 44) printf("x stall %2d %u\n", stall, cycle + last_count); - if (cycle + last_count > 1215348544) exit(1); } static void emit_log_gte_stall(int i, int stall, u_int reglist) @@ -3330,14 +3348,13 @@ static void emit_log_gte_stall(int i, int stall, u_int reglist) } #endif -static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) +static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) { int j = i, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed; int rtmp = reglist_find_free(reglist); - if (HACK_ENABLED(NDHACK_GTE_NO_STALL)) + if (HACK_ENABLED(NDHACK_NO_STALLS)) return; - //assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) { // happens occasionally... cc evicted? Don't bother then //printf("no cc %08x\n", start + i*4); @@ -3349,6 +3366,7 @@ static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, if (cop2_is_stalling_op(j, &other_gte_op_cycles) || bt[j]) break; } + j = max(j, 0); } cycles_passed = CLOCK_ADJUST(ccadj[i] - ccadj[j]); if (other_gte_op_cycles >= 0) @@ -3357,7 +3375,7 @@ static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, stall = 0; // can't stall if (stall == -MAXBLOCK && rtmp >= 0) { // unknown stall, do the expensive runtime check - assem_debug("; cop2_call_stall_check\n"); + assem_debug("; cop2_do_stall_check\n"); #if 0 // too slow save_regs(reglist); emit_movimm(gte_cycletab[op], 0); @@ -3415,6 +3433,98 @@ static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, host_tempreg_release(); } +static int is_mflohi(int i) +{ + return (itype[i] == MOV && (rs1[i] == HIREG || rs1[i] == LOREG)); +} + +static int check_multdiv(int i, int *cycles) +{ + if (itype[i] != MULTDIV) + return 0; + if (opcode2[i] == 0x18 || opcode2[i] == 0x19) // MULT(U) + *cycles = 11; // approx from 7 11 14 + else + *cycles = 37; + return 1; +} + +static void multdiv_prepare_stall(int i, const struct regstat *i_regs) +{ + int j, found = 0, c = 0; + if (HACK_ENABLED(NDHACK_NO_STALLS)) + return; + if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) { + // happens occasionally... cc evicted? Don't bother then + return; + } + for (j = i + 1; j < slen; j++) { + if (bt[j]) + break; + if ((found = is_mflohi(j))) + break; + if (is_jump(j)) { + // check ds + if (j + 1 < slen && (found = is_mflohi(j + 1))) + j++; + break; + } + } + if (found) + // handle all in multdiv_do_stall() + return; + check_multdiv(i, &c); + assert(c > 0); + assem_debug("; muldiv prepare stall %d\n", c); + host_tempreg_acquire(); + emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]) + c, HOST_TEMPREG); + emit_writeword(HOST_TEMPREG, &psxRegs.muldivBusyCycle); + host_tempreg_release(); +} + +static void multdiv_do_stall(int i, const struct regstat *i_regs) +{ + int j, known_cycles = 0; + u_int reglist = get_host_reglist(i_regs->regmap); + int rtmp = get_reg(i_regs->regmap, -1); + if (rtmp < 0) + rtmp = reglist_find_free(reglist); + if (HACK_ENABLED(NDHACK_NO_STALLS)) + return; + if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG || rtmp < 0) { + // happens occasionally... cc evicted? Don't bother then + //printf("no cc/rtmp %08x\n", start + i*4); + return; + } + if (!bt[i]) { + for (j = i - 1; j >= 0; j--) { + if (is_ds[j]) break; + if (check_multdiv(j, &known_cycles) || bt[j]) + break; + if (is_mflohi(j)) + // already handled by this op + return; + } + j = max(j, 0); + } + if (known_cycles > 0) { + known_cycles -= CLOCK_ADJUST(ccadj[i] - ccadj[j]); + assem_debug("; muldiv stall resolved %d\n", known_cycles); + if (known_cycles > 0) + emit_addimm(HOST_CCREG, known_cycles, HOST_CCREG); + return; + } + assem_debug("; muldiv stall unresolved\n"); + host_tempreg_acquire(); + emit_readword(&psxRegs.muldivBusyCycle, rtmp); + emit_addimm(rtmp, -CLOCK_ADJUST(ccadj[i]), rtmp); + emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG, 37); + emit_cmovb_reg(rtmp, HOST_CCREG); + //emit_log_gte_stall(i, 0, reglist); + host_tempreg_release(); +} + static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) { switch (copr) { @@ -3532,8 +3642,9 @@ static void c2ls_assemble(int i, const struct regstat *i_regs) if (!offset&&!c&&s>=0) ar=s; assert(ar>=0); + cop2_do_stall_check(0, i, i_regs, reglist); + if (opcode[i]==0x3a) { // SWC2 - cop2_call_stall_check(0, i, i_regs, reglist_exclude(reglist, tl, -1)); cop2_get_dreg(copr,tl,-1); type=STOREW_STUB; } @@ -3600,12 +3711,13 @@ static void cop2_assemble(int i, const struct regstat *i_regs) u_int copr = (source[i]>>11) & 0x1f; signed char temp = get_reg(i_regs->regmap, -1); - if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2 - if (!HACK_ENABLED(NDHACK_GTE_NO_STALL)) { + if (!HACK_ENABLED(NDHACK_NO_STALLS)) { + u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), temp, -1); + if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2 signed char tl = get_reg(i_regs->regmap, rt1[i]); - u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), tl, temp); - cop2_call_stall_check(0, i, i_regs, reglist); + reglist = reglist_exclude(reglist, tl, -1); } + cop2_do_stall_check(0, i, i_regs, reglist); } if (opcode2[i]==0) { // MFC2 signed char tl=get_reg(i_regs->regmap,rt1[i]); @@ -3753,6 +3865,8 @@ static void mov_assemble(int i,struct regstat *i_regs) else emit_loadreg(rs1[i],tl); } } + if (rs1[i] == HIREG || rs1[i] == LOREG) // MFHI/MFLO + multdiv_do_stall(i, i_regs); } // call interpreter, exception handler, things that change pc/regs/cycles ... @@ -3921,7 +4035,9 @@ static void ds_assemble(int i,struct regstat *i_regs) case C2OP: c2op_assemble(i,i_regs);break; case MULTDIV: - multdiv_assemble(i,i_regs);break; + multdiv_assemble(i,i_regs); + multdiv_prepare_stall(i,i_regs); + break; case MOV: mov_assemble(i,i_regs);break; case SYSCALL: @@ -4577,7 +4693,9 @@ static void ds_assemble_entry(int i) case C2OP: c2op_assemble(t,®s[t]);break; case MULTDIV: - multdiv_assemble(t,®s[t]);break; + multdiv_assemble(t,®s[t]); + multdiv_prepare_stall(i,®s[t]); + break; case MOV: mov_assemble(t,®s[t]);break; case SYSCALL: @@ -5921,7 +6039,9 @@ static void pagespan_ds() case C2OP: c2op_assemble(0,®s[0]);break; case MULTDIV: - multdiv_assemble(0,®s[0]);break; + multdiv_assemble(0,®s[0]); + multdiv_prepare_stall(0,®s[0]); + break; case MOV: mov_assemble(0,®s[0]);break; case SYSCALL: @@ -6731,6 +6851,9 @@ void new_dynarec_clear_full(void) for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); + + cycle_multiplier_old = cycle_multiplier; + new_dynarec_hacks_old = new_dynarec_hacks; } void new_dynarec_init(void) @@ -8105,7 +8228,7 @@ int new_recompile_block(u_int addr) // this should really be removed since the real stalls have been implemented, // but doing so causes sizeable perf regression against the older version u_int gtec = gte_cycletab[source[i] & 0x3f]; - cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? gtec/2 : 2; + cc += HACK_ENABLED(NDHACK_NO_STALLS) ? gtec/2 : 2; } else if(i>1&&itype[i]==STORE&&itype[i-1]==STORE&&itype[i-2]==STORE&&!bt[i]) { @@ -8114,7 +8237,7 @@ int new_recompile_block(u_int addr) else if(itype[i]==C2LS) { // same as with C2OP - cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? 4 : 2; + cc += HACK_ENABLED(NDHACK_NO_STALLS) ? 4 : 2; } #endif else @@ -9094,7 +9217,9 @@ int new_recompile_block(u_int addr) case C2OP: c2op_assemble(i,®s[i]);break; case MULTDIV: - multdiv_assemble(i,®s[i]);break; + multdiv_assemble(i,®s[i]); + multdiv_prepare_stall(i,®s[i]); + break; case MOV: mov_assemble(i,®s[i]);break; case SYSCALL: diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index bff1c164..b9a3c67c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -6,14 +6,16 @@ extern int stop; extern int new_dynarec_did_compile; extern int cycle_multiplier; // 100 for 1.0 extern int cycle_multiplier_override; +extern int cycle_multiplier_old; #define NDHACK_NO_SMC_CHECK (1<<0) #define NDHACK_GTE_UNNEEDED (1<<1) #define NDHACK_GTE_NO_FLAGS (1<<2) #define NDHACK_OVERRIDE_CYCLE_M (1<<3) -#define NDHACK_GTE_NO_STALL (1<<4) +#define NDHACK_NO_STALLS (1<<4) extern int new_dynarec_hacks; extern int new_dynarec_hacks_pergame; +extern int new_dynarec_hacks_old; void new_dynarec_init(void); void new_dynarec_cleanup(void); diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index c9d300aa..2dd91cf1 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -133,6 +133,7 @@ typedef struct { boolean UseNet; boolean VSyncWA; boolean icache_emulation; + boolean DisableStalls; u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER u8 PsxType; // PSX_TYPE_NTSC or PSX_TYPE_PAL #ifdef _WIN32 diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index b171b0a6..2dd90b0f 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -27,6 +27,7 @@ #include "psxhle.h" #include "debug.h" #include "psxinterpreter.h" +#include static int branch = 0; static int branch2 = 0; @@ -610,6 +611,11 @@ void psxDIV() { } } +void psxDIV_stall() { + psxRegs.muldivBusyCycle = psxRegs.cycle + 37; + psxDIV(); +} + void psxDIVU() { if (_rRt_ != 0) { _rLo_ = _rRs_ / _rRt_; @@ -621,6 +627,11 @@ void psxDIVU() { } } +void psxDIVU_stall() { + psxRegs.muldivBusyCycle = psxRegs.cycle + 37; + psxDIVU(); +} + void psxMULT() { u64 res = (s64)((s64)_i32(_rRs_) * (s64)_i32(_rRt_)); @@ -628,6 +639,15 @@ void psxMULT() { psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } +void psxMULT_stall() { + // approximate, but maybe good enough + u32 rs = _rRs_; + u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1)); + u32 c = 7 + (2 - (lz / 11)) * 4; + psxRegs.muldivBusyCycle = psxRegs.cycle + c; + psxMULT(); +} + void psxMULTU() { u64 res = (u64)((u64)_u32(_rRs_) * (u64)_u32(_rRt_)); @@ -635,6 +655,14 @@ void psxMULTU() { psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } +void psxMULTU_stall() { + // approximate, but maybe good enough + u32 lz = __builtin_clz(_rRs_ | 1); + u32 c = 7 + (2 - (lz / 11)) * 4; + psxRegs.muldivBusyCycle = psxRegs.cycle + c; + psxMULTU(); +} + /********************************************************* * Register branch logic * * Format: OP rs, offset * @@ -678,6 +706,18 @@ void psxLUI() { if (!_Rt_) return; _u32(_rRt_) = psxRegs.code << 16; } // Upper void psxMFHI() { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi void psxMFLO() { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo +static void mflohiCheckStall(void) +{ + u32 left = psxRegs.muldivBusyCycle - psxRegs.cycle; + if (left <= 37) { + //printf("muldiv stall %u\n", left); + psxRegs.cycle = psxRegs.muldivBusyCycle; + } +} + +void psxMFHI_stall() { mflohiCheckStall(); psxMFHI(); } +void psxMFLO_stall() { mflohiCheckStall(); psxMFLO(); } + /********************************************************* * Move to GPR to HI/LO & Register jump * * Format: OP rs * @@ -934,9 +974,12 @@ void psxCOP0() { } void psxCOP2() { + psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D); +} + +void psxCOP2_stall() { u32 f = _Funct_; - if (f != 0 || _Rs_ < 4) // not MTC2/CTC2 - gteCheckStall(f); + gteCheckStall(f); psxCP2[f]((struct psxCP2Regs *)&psxRegs.CP2D); } @@ -1073,6 +1116,40 @@ void intNotify (int note, void *data) { #endif } +void applyConfig() { + assert(psxBSC[18] == psxCOP2 || psxBSC[18] == psxCOP2_stall); + assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall); + assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall); + assert(psxSPC[16] == psxMFHI || psxSPC[16] == psxMFHI_stall); + assert(psxSPC[18] == psxMFLO || psxSPC[18] == psxMFLO_stall); + assert(psxSPC[24] == psxMULT || psxSPC[24] == psxMULT_stall); + assert(psxSPC[25] == psxMULTU || psxSPC[25] == psxMULTU_stall); + assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); + assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); + + if (Config.DisableStalls) { + psxBSC[18] = psxCOP2; + psxBSC[50] = gteLWC2; + psxBSC[58] = gteSWC2; + psxSPC[16] = psxMFHI; + psxSPC[18] = psxMFLO; + psxSPC[24] = psxMULT; + psxSPC[25] = psxMULTU; + psxSPC[26] = psxDIV; + psxSPC[27] = psxDIVU; + } else { + psxBSC[18] = psxCOP2_stall; + psxBSC[50] = gteLWC2_stall; + psxBSC[58] = gteSWC2_stall; + psxSPC[16] = psxMFHI_stall; + psxSPC[18] = psxMFLO_stall; + psxSPC[24] = psxMULT_stall; + psxSPC[25] = psxMULTU_stall; + psxSPC[26] = psxDIV_stall; + psxSPC[27] = psxDIVU_stall; + } +} + static void intShutdown() { #ifdef ICACHE_EMULATION if (ICache_Addr) @@ -1123,5 +1200,6 @@ R3000Acpu psxInt = { #ifdef ICACHE_EMULATION intNotify, #endif + applyConfig, intShutdown }; diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 54359159..94d7d955 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -47,6 +47,7 @@ typedef struct { #ifdef ICACHE_EMULATION void (*Notify)(int note, void *data); #endif + void (*ApplyConfig)(); void (*Shutdown)(); } R3000Acpu; @@ -194,9 +195,10 @@ typedef struct { u32 interrupt; struct { u32 sCycle, cycle; } intCycle[32]; u32 gteBusyCycle; + u32 muldivBusyCycle; // warning: changing anything in psxRegisters requires update of all // asm in libpcsxcore/new_dynarec/, but this member can be replaced - u32 reserved[3]; + u32 reserved[2]; } psxRegisters; extern psxRegisters psxRegs;