From 21e24294686e369064a34e9ec807cc9caaf1aa3e Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 22 Jan 2025 23:26:54 +0200 Subject: [PATCH] drc: handle gte stalls closer to the interpreter --- libpcsxcore/gte.c | 4 +-- libpcsxcore/gte.h | 1 - libpcsxcore/new_dynarec/linkage_arm.S | 13 --------- libpcsxcore/new_dynarec/linkage_arm64.S | 13 --------- libpcsxcore/new_dynarec/new_dynarec.c | 36 ++++++------------------- 5 files changed, 10 insertions(+), 57 deletions(-) diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 991a4452..bdc8fa52 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -283,8 +283,8 @@ const unsigned char gte_cycletab[64] = { 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, }; -// warning: called by the dynarec -int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs) { +// warning: ari64 drc stores it's negative cycles in gteBusyCycle +static int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs) { u32 left = regs->gteBusyCycle - regs->cycle; int stall = 0; diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index f1dcc66a..70ec9fe1 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -69,7 +69,6 @@ struct psxCP2Regs; extern const unsigned char gte_cycletab[64]; -int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs); void gteCheckStall(u32 op); u32 MFC2(struct psxCP2Regs *regs, int reg); diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 39afc88e..9ac9e05d 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -31,7 +31,6 @@ #define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param) #define ndrc_write_invalidate_one ESYM(ndrc_write_invalidate_one) #define gen_interupt ESYM(gen_interupt) -#define gteCheckStallRaw ESYM(gteCheckStallRaw) #define psxException ESYM(psxException) #define execI ESYM(execI) #endif @@ -637,18 +636,6 @@ FUNCTION(rcnt2_read_count_m1): lsr r0, #16 @ /= 8 bx lr -FUNCTION(call_gteStall): - /* r0 = op_cycles, r1 = cycles */ - ldr r2, [fp, #LO_last_count] - str lr, [fp, #LO_saved_lr] - add r1, r1, r2 - str r1, [fp, #LO_cycle] - add r1, fp, #LO_psxRegs - bl gteCheckStallRaw - ldr lr, [fp, #LO_saved_lr] - add r10, r10, r0 - bx lr - #ifdef HAVE_ARMV6 FUNCTION(get_reg): diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index fb961cca..47aa39c7 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -29,7 +29,6 @@ #define ndrc_add_jump_out ESYM(ndrc_add_jump_out) #define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) #define gen_interupt ESYM(gen_interupt) -#define gteCheckStallRaw ESYM(gteCheckStallRaw) #define psxException ESYM(psxException) #define execI ESYM(execI) #endif @@ -382,18 +381,6 @@ jump_handle_swx_interp: /* almost never happens */ bl execI b jump_to_new_pc -FUNCTION(call_gteStall): - /* w0 = op_cycles, w1 = cycles */ - ldr w2, [rFP, #LO_last_count] - str lr, [rFP, #LO_saved_lr] - add w1, w1, w2 - str w1, [rFP, #LO_cycle] - add x1, rFP, #LO_psxRegs - bl gteCheckStallRaw - ldr lr, [rFP, #LO_saved_lr] - add rCC, rCC, w0 - ret - #ifdef DRC_DBG #undef do_insn_cmp FUNCTION(do_insn_cmp_arm64): diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 86333a86..d1450515 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -406,7 +406,6 @@ void jump_overflow_ds(u_int u0, u_int u1, u_int pc); void jump_addrerror (u_int cause, u_int addr, u_int pc); void jump_addrerror_ds(u_int cause, u_int addr, u_int pc); void jump_to_new_pc(); -void call_gteStall(); void new_dyna_leave(); void *ndrc_get_addr_ht(u_int vaddr, struct ht_entry *ht); @@ -1296,7 +1295,6 @@ static const struct { FUNCNAME(jump_overflow_ds), FUNCNAME(jump_addrerror), FUNCNAME(jump_addrerror_ds), - FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), FUNCNAME(pcsx_mtc0_ds), @@ -3671,11 +3669,7 @@ static void rfe_assemble(int i, const struct regstat *i_regs) static int cop2_is_stalling_op(int i, int *cycles) { - if (dops[i].opcode == 0x3a) { // SWC2 - *cycles = 0; - return 1; - } - if (dops[i].itype == COP2 && (dops[i].opcode2 == 0 || dops[i].opcode2 == 2)) { // MFC2/CFC2 + if (dops[i].itype == COP2 || dops[i].itype == C2LS) { *cycles = 0; return 1; } @@ -3709,7 +3703,7 @@ static void emit_log_gte_stall(int i, int stall, u_int reglist) static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist) { - int j = i, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed; + int j = i, cycles, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed; int rtmp = reglist_find_free(reglist); if (HACK_ENABLED(NDHACK_NO_STALLS)) @@ -3733,17 +3727,11 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u if (other_gte_op_cycles >= 0) stall = other_gte_op_cycles - cycles_passed; else if (cycles_passed >= 44) - stall = 0; // can't stall + stall = 0; // can't possibly stall if (stall == -MAXBLOCK && rtmp >= 0) { // unknown stall, do the expensive runtime check assem_debug("; cop2_do_stall_check\n"); -#if 0 // too slow - save_regs(reglist); - emit_movimm(gte_cycletab[op], 0); - emit_addimm(HOST_CCREG, cinfo[i].ccadj, 1); - emit_far_call(call_gteStall); - restore_regs(reglist); -#else + // busy - (cc + adj) -> busy - adj - cc host_tempreg_acquire(); emit_readword(&psxRegs.gteBusyCycle, rtmp); emit_addimm(rtmp, -cinfo[i].ccadj, rtmp); @@ -3752,7 +3740,6 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u emit_cmovb_reg(rtmp, HOST_CCREG); //emit_log_gte_stall(i, 0, reglist); host_tempreg_release(); -#endif } else if (stall > 0) { //emit_log_gte_stall(i, stall, reglist); @@ -3760,7 +3747,8 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u } // save gteBusyCycle, if needed - if (gte_cycletab[op] == 0) + cycles = gte_cycletab[op]; + if (cycles == 0) return; other_gte_op_cycles = -1; for (j = i + 1; j < slen; j++) { @@ -3777,20 +3765,12 @@ static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u // will handle stall when assembling that op return; cycles_passed = cinfo[min(j, slen -1)].ccadj - cinfo[i].ccadj; - if (cycles_passed >= 44) + if (cycles_passed >= cycles) return; assem_debug("; save gteBusyCycle\n"); host_tempreg_acquire(); -#if 0 - emit_readword(&last_count, HOST_TEMPREG); - emit_add(HOST_TEMPREG, HOST_CCREG, HOST_TEMPREG); - emit_addimm(HOST_TEMPREG, cinfo[i].ccadj, HOST_TEMPREG); - emit_addimm(HOST_TEMPREG, gte_cycletab[op]), HOST_TEMPREG); - emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); -#else - emit_addimm(HOST_CCREG, cinfo[i].ccadj + gte_cycletab[op], HOST_TEMPREG); + emit_addimm(HOST_CCREG, cinfo[i].ccadj + cycles, HOST_TEMPREG); emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle); -#endif host_tempreg_release(); } -- 2.39.5