CE_CONFIG_VAL(RCntFix),
CE_CONFIG_VAL(VSyncWA),
CE_CONFIG_VAL(icache_emulation),
+ CE_CONFIG_VAL(DisableStalls),
CE_CONFIG_VAL(Cpu),
CE_INTVAL(region),
CE_INTVAL_V(g_scaler, 3),
static const char h_cfg_nosmc[] = "Will cause crashes when loading, break memcards";
static const char h_cfg_gteunn[] = "May cause graphical glitches";
static const char h_cfg_gteflgs[] = "Will cause graphical glitches";
-static const char h_cfg_gtestll[] = "Some games will run too fast";
+static const char h_cfg_stalls[] = "Will cause some games to run too fast";
static menu_entry e_menu_speed_hacks[] =
{
mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc),
mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn),
mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs),
- mee_onoff_h ("Disable GTE stalls", 0, new_dynarec_hacks, NDHACK_GTE_NO_STALL, h_cfg_gtestll),
+ mee_onoff_h ("Disable CPU/GTE stalls", 0, Config.DisableStalls, 1, h_cfg_stalls),
mee_end,
};
void menu_loop(void)
{
- int cycle_multiplier_old = cycle_multiplier;
- int ndrc_hacks_old = new_dynarec_hacks;
static int warned_about_bios = 0;
static int sel = 0;
- int ndrc_changed;
menu_leave_emu();
in_set_config_int(0, IN_CFG_BLOCKING, 0);
- ndrc_changed = cycle_multiplier_old != cycle_multiplier
- || ndrc_hacks_old != new_dynarec_hacks;
- menu_prepare_emu(ndrc_changed);
+ menu_prepare_emu();
}
static int qsort_strcmp(const void *p1, const void *p2)
cpu_clock = plat_target_cpu_clock_get();
}
-void menu_prepare_emu(int ndrc_config_changed)
+void menu_prepare_emu(void)
{
R3000Acpu *prev_cpu = psxCpu;
// note that this does not really reset, just clears drc caches
psxCpu->Reset();
}
- else if (ndrc_config_changed)
- new_dynarec_clear_full();
+
+ psxCpu->ApplyConfig();
// core doesn't care about Config.Cdda changes,
// so handle them manually here
#ifndef min
#define min(a, b) ((b) < (a) ? (b) : (a))
#endif
+#ifndef max
+#define max(a, b) ((b) > (a) ? (b) : (a))
+#endif
//#define DISASM
-//#define assem_debug printf
-//#define inv_debug printf
+//#define ASSEM_PRINT
+
+#ifdef ASSEM_PRINT
+#define assem_debug printf
+#else
#define assem_debug(...)
+#endif
+//#define inv_debug printf
#define inv_debug(...)
#ifdef __i386__
int new_dynarec_hacks;
int new_dynarec_hacks_pergame;
+ int new_dynarec_hacks_old;
int new_dynarec_did_compile;
#define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x))
static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override);
static void *get_direct_memhandler(void *table, u_int addr,
enum stub_type type, uintptr_t *addr_host);
-static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist);
+static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist);
static void pass_args(int a0, int a1);
static void emit_far_jump(const void *f);
static void emit_far_call(const void *f);
int cycle_multiplier; // 100 for 1.0
int cycle_multiplier_override;
+int cycle_multiplier_old;
static int CLOCK_ADJUST(int x)
{
static void host_tempreg_release(void) {}
#endif
-#ifdef DRC_DBG
+#ifdef ASSEM_PRINT
extern void gen_interupt();
extern void do_insn_cmp();
#define FUNCNAME(f) { f, " " #f }
FUNCNAME(new_dyna_leave),
FUNCNAME(pcsx_mtc0),
FUNCNAME(pcsx_mtc0_ds),
+#ifdef DRC_DBG
FUNCNAME(do_insn_cmp),
+#endif
#ifdef __arm__
FUNCNAME(verify_code),
#endif
static void mov_alloc(struct regstat *current,int i)
{
+ if (rs1[i] == HIREG || rs1[i] == LOREG) {
+ // logically this is needed but just won't work, no idea why
+ //alloc_cc(current,i); // for stalls
+ //dirty_reg(current,CCREG);
+ }
+
// Note: Don't need to actually alloc the source registers
//alloc_reg(current,i,rs1[i]);
alloc_reg(current,i,rt1[i]);
// case 0x1F: DDIVU
clear_const(current,rs1[i]);
clear_const(current,rs2[i]);
+ alloc_cc(current,i); // for stalls
if(rs1[i]&&rs2[i])
{
if((opcode2[i]&4)==0) // 32-bit
{
if ((u_int)stall <= 44)
printf("x stall %2d %u\n", stall, cycle + last_count);
- if (cycle + last_count > 1215348544) exit(1);
}
static void emit_log_gte_stall(int i, int stall, u_int reglist)
}
#endif
-static void cop2_call_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist)
+static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist)
{
int j = i, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed;
int rtmp = reglist_find_free(reglist);
- if (HACK_ENABLED(NDHACK_GTE_NO_STALL))
+ if (HACK_ENABLED(NDHACK_NO_STALLS))
return;
- //assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG);
if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) {
// happens occasionally... cc evicted? Don't bother then
//printf("no cc %08x\n", start + i*4);
if (cop2_is_stalling_op(j, &other_gte_op_cycles) || bt[j])
break;
}
+ j = max(j, 0);
}
cycles_passed = CLOCK_ADJUST(ccadj[i] - ccadj[j]);
if (other_gte_op_cycles >= 0)
stall = 0; // can't stall
if (stall == -MAXBLOCK && rtmp >= 0) {
// unknown stall, do the expensive runtime check
- assem_debug("; cop2_call_stall_check\n");
+ assem_debug("; cop2_do_stall_check\n");
#if 0 // too slow
save_regs(reglist);
emit_movimm(gte_cycletab[op], 0);
host_tempreg_release();
}
+static int is_mflohi(int i)
+{
+ return (itype[i] == MOV && (rs1[i] == HIREG || rs1[i] == LOREG));
+}
+
+static int check_multdiv(int i, int *cycles)
+{
+ if (itype[i] != MULTDIV)
+ return 0;
+ if (opcode2[i] == 0x18 || opcode2[i] == 0x19) // MULT(U)
+ *cycles = 11; // approx from 7 11 14
+ else
+ *cycles = 37;
+ return 1;
+}
+
+static void multdiv_prepare_stall(int i, const struct regstat *i_regs)
+{
+ int j, found = 0, c = 0;
+ if (HACK_ENABLED(NDHACK_NO_STALLS))
+ return;
+ if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG) {
+ // happens occasionally... cc evicted? Don't bother then
+ return;
+ }
+ for (j = i + 1; j < slen; j++) {
+ if (bt[j])
+ break;
+ if ((found = is_mflohi(j)))
+ break;
+ if (is_jump(j)) {
+ // check ds
+ if (j + 1 < slen && (found = is_mflohi(j + 1)))
+ j++;
+ break;
+ }
+ }
+ if (found)
+ // handle all in multdiv_do_stall()
+ return;
+ check_multdiv(i, &c);
+ assert(c > 0);
+ assem_debug("; muldiv prepare stall %d\n", c);
+ host_tempreg_acquire();
+ emit_addimm(HOST_CCREG, CLOCK_ADJUST(ccadj[i]) + c, HOST_TEMPREG);
+ emit_writeword(HOST_TEMPREG, &psxRegs.muldivBusyCycle);
+ host_tempreg_release();
+}
+
+static void multdiv_do_stall(int i, const struct regstat *i_regs)
+{
+ int j, known_cycles = 0;
+ u_int reglist = get_host_reglist(i_regs->regmap);
+ int rtmp = get_reg(i_regs->regmap, -1);
+ if (rtmp < 0)
+ rtmp = reglist_find_free(reglist);
+ if (HACK_ENABLED(NDHACK_NO_STALLS))
+ return;
+ if (get_reg(i_regs->regmap, CCREG) != HOST_CCREG || rtmp < 0) {
+ // happens occasionally... cc evicted? Don't bother then
+ //printf("no cc/rtmp %08x\n", start + i*4);
+ return;
+ }
+ if (!bt[i]) {
+ for (j = i - 1; j >= 0; j--) {
+ if (is_ds[j]) break;
+ if (check_multdiv(j, &known_cycles) || bt[j])
+ break;
+ if (is_mflohi(j))
+ // already handled by this op
+ return;
+ }
+ j = max(j, 0);
+ }
+ if (known_cycles > 0) {
+ known_cycles -= CLOCK_ADJUST(ccadj[i] - ccadj[j]);
+ assem_debug("; muldiv stall resolved %d\n", known_cycles);
+ if (known_cycles > 0)
+ emit_addimm(HOST_CCREG, known_cycles, HOST_CCREG);
+ return;
+ }
+ assem_debug("; muldiv stall unresolved\n");
+ host_tempreg_acquire();
+ emit_readword(&psxRegs.muldivBusyCycle, rtmp);
+ emit_addimm(rtmp, -CLOCK_ADJUST(ccadj[i]), rtmp);
+ emit_sub(rtmp, HOST_CCREG, HOST_TEMPREG);
+ emit_cmpimm(HOST_TEMPREG, 37);
+ emit_cmovb_reg(rtmp, HOST_CCREG);
+ //emit_log_gte_stall(i, 0, reglist);
+ host_tempreg_release();
+}
+
static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
{
switch (copr) {
if (!offset&&!c&&s>=0) ar=s;
assert(ar>=0);
+ cop2_do_stall_check(0, i, i_regs, reglist);
+
if (opcode[i]==0x3a) { // SWC2
- cop2_call_stall_check(0, i, i_regs, reglist_exclude(reglist, tl, -1));
cop2_get_dreg(copr,tl,-1);
type=STOREW_STUB;
}
u_int copr = (source[i]>>11) & 0x1f;
signed char temp = get_reg(i_regs->regmap, -1);
- if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2
- if (!HACK_ENABLED(NDHACK_GTE_NO_STALL)) {
+ if (!HACK_ENABLED(NDHACK_NO_STALLS)) {
+ u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), temp, -1);
+ if (opcode2[i] == 0 || opcode2[i] == 2) { // MFC2/CFC2
signed char tl = get_reg(i_regs->regmap, rt1[i]);
- u_int reglist = reglist_exclude(get_host_reglist(i_regs->regmap), tl, temp);
- cop2_call_stall_check(0, i, i_regs, reglist);
+ reglist = reglist_exclude(reglist, tl, -1);
}
+ cop2_do_stall_check(0, i, i_regs, reglist);
}
if (opcode2[i]==0) { // MFC2
signed char tl=get_reg(i_regs->regmap,rt1[i]);
else emit_loadreg(rs1[i],tl);
}
}
+ if (rs1[i] == HIREG || rs1[i] == LOREG) // MFHI/MFLO
+ multdiv_do_stall(i, i_regs);
}
// call interpreter, exception handler, things that change pc/regs/cycles ...
case C2OP:
c2op_assemble(i,i_regs);break;
case MULTDIV:
- multdiv_assemble(i,i_regs);break;
+ multdiv_assemble(i,i_regs);
+ multdiv_prepare_stall(i,i_regs);
+ break;
case MOV:
mov_assemble(i,i_regs);break;
case SYSCALL:
case C2OP:
c2op_assemble(t,®s[t]);break;
case MULTDIV:
- multdiv_assemble(t,®s[t]);break;
+ multdiv_assemble(t,®s[t]);
+ multdiv_prepare_stall(i,®s[t]);
+ break;
case MOV:
mov_assemble(t,®s[t]);break;
case SYSCALL:
case C2OP:
c2op_assemble(0,®s[0]);break;
case MULTDIV:
- multdiv_assemble(0,®s[0]);break;
+ multdiv_assemble(0,®s[0]);
+ multdiv_prepare_stall(0,®s[0]);
+ break;
case MOV:
mov_assemble(0,®s[0]);break;
case SYSCALL:
for(n=0;n<4096;n++) ll_clear(jump_in+n);
for(n=0;n<4096;n++) ll_clear(jump_out+n);
for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
+
+ cycle_multiplier_old = cycle_multiplier;
+ new_dynarec_hacks_old = new_dynarec_hacks;
}
void new_dynarec_init(void)
// this should really be removed since the real stalls have been implemented,
// but doing so causes sizeable perf regression against the older version
u_int gtec = gte_cycletab[source[i] & 0x3f];
- cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? gtec/2 : 2;
+ cc += HACK_ENABLED(NDHACK_NO_STALLS) ? gtec/2 : 2;
}
else if(i>1&&itype[i]==STORE&&itype[i-1]==STORE&&itype[i-2]==STORE&&!bt[i])
{
else if(itype[i]==C2LS)
{
// same as with C2OP
- cc += HACK_ENABLED(NDHACK_GTE_NO_STALL) ? 4 : 2;
+ cc += HACK_ENABLED(NDHACK_NO_STALLS) ? 4 : 2;
}
#endif
else
case C2OP:
c2op_assemble(i,®s[i]);break;
case MULTDIV:
- multdiv_assemble(i,®s[i]);break;
+ multdiv_assemble(i,®s[i]);
+ multdiv_prepare_stall(i,®s[i]);
+ break;
case MOV:
mov_assemble(i,®s[i]);break;
case SYSCALL:
#include "psxhle.h"
#include "debug.h"
#include "psxinterpreter.h"
+#include <assert.h>
static int branch = 0;
static int branch2 = 0;
}
}
+void psxDIV_stall() {
+ psxRegs.muldivBusyCycle = psxRegs.cycle + 37;
+ psxDIV();
+}
+
void psxDIVU() {
if (_rRt_ != 0) {
_rLo_ = _rRs_ / _rRt_;
}
}
+void psxDIVU_stall() {
+ psxRegs.muldivBusyCycle = psxRegs.cycle + 37;
+ psxDIVU();
+}
+
void psxMULT() {
u64 res = (s64)((s64)_i32(_rRs_) * (s64)_i32(_rRt_));
psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
}
+void psxMULT_stall() {
+ // approximate, but maybe good enough
+ u32 rs = _rRs_;
+ u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1));
+ u32 c = 7 + (2 - (lz / 11)) * 4;
+ psxRegs.muldivBusyCycle = psxRegs.cycle + c;
+ psxMULT();
+}
+
void psxMULTU() {
u64 res = (u64)((u64)_u32(_rRs_) * (u64)_u32(_rRt_));
psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
}
+void psxMULTU_stall() {
+ // approximate, but maybe good enough
+ u32 lz = __builtin_clz(_rRs_ | 1);
+ u32 c = 7 + (2 - (lz / 11)) * 4;
+ psxRegs.muldivBusyCycle = psxRegs.cycle + c;
+ psxMULTU();
+}
+
/*********************************************************
* Register branch logic *
* Format: OP rs, offset *
void psxMFHI() { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi
void psxMFLO() { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo
+static void mflohiCheckStall(void)
+{
+ u32 left = psxRegs.muldivBusyCycle - psxRegs.cycle;
+ if (left <= 37) {
+ //printf("muldiv stall %u\n", left);
+ psxRegs.cycle = psxRegs.muldivBusyCycle;
+ }
+}
+
+void psxMFHI_stall() { mflohiCheckStall(); psxMFHI(); }
+void psxMFLO_stall() { mflohiCheckStall(); psxMFLO(); }
+
/*********************************************************
* Move to GPR to HI/LO & Register jump *
* Format: OP rs *
}
void psxCOP2() {
+ psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D);
+}
+
+void psxCOP2_stall() {
u32 f = _Funct_;
- if (f != 0 || _Rs_ < 4) // not MTC2/CTC2
- gteCheckStall(f);
+ gteCheckStall(f);
psxCP2[f]((struct psxCP2Regs *)&psxRegs.CP2D);
}
#endif
}
+void applyConfig() {
+ assert(psxBSC[18] == psxCOP2 || psxBSC[18] == psxCOP2_stall);
+ assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall);
+ assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall);
+ assert(psxSPC[16] == psxMFHI || psxSPC[16] == psxMFHI_stall);
+ assert(psxSPC[18] == psxMFLO || psxSPC[18] == psxMFLO_stall);
+ assert(psxSPC[24] == psxMULT || psxSPC[24] == psxMULT_stall);
+ assert(psxSPC[25] == psxMULTU || psxSPC[25] == psxMULTU_stall);
+ assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall);
+ assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall);
+
+ if (Config.DisableStalls) {
+ psxBSC[18] = psxCOP2;
+ psxBSC[50] = gteLWC2;
+ psxBSC[58] = gteSWC2;
+ psxSPC[16] = psxMFHI;
+ psxSPC[18] = psxMFLO;
+ psxSPC[24] = psxMULT;
+ psxSPC[25] = psxMULTU;
+ psxSPC[26] = psxDIV;
+ psxSPC[27] = psxDIVU;
+ } else {
+ psxBSC[18] = psxCOP2_stall;
+ psxBSC[50] = gteLWC2_stall;
+ psxBSC[58] = gteSWC2_stall;
+ psxSPC[16] = psxMFHI_stall;
+ psxSPC[18] = psxMFLO_stall;
+ psxSPC[24] = psxMULT_stall;
+ psxSPC[25] = psxMULTU_stall;
+ psxSPC[26] = psxDIV_stall;
+ psxSPC[27] = psxDIVU_stall;
+ }
+}
+
static void intShutdown() {
#ifdef ICACHE_EMULATION
if (ICache_Addr)
#ifdef ICACHE_EMULATION
intNotify,
#endif
+ applyConfig,
intShutdown
};