From 4cc373dd225bc4c0db251b3689571e97a1e4ec5a Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 9 Sep 2022 22:11:30 +0300 Subject: [PATCH] psxinterpreter: reduce the use of globals they induce penalties with -fPIC --- libpcsxcore/gte.c | 60 +-- libpcsxcore/gte.h | 11 +- libpcsxcore/new_dynarec/emu_if.c | 11 +- libpcsxcore/new_dynarec/new_dynarec.c | 2 +- libpcsxcore/psxinterpreter.c | 614 ++++++++++++++------------ libpcsxcore/psxinterpreter.h | 10 +- libpcsxcore/r3000a.c | 5 +- libpcsxcore/r3000a.h | 73 --- 8 files changed, 364 insertions(+), 422 deletions(-) diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 6b3b299f..03261ee2 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -301,8 +301,7 @@ void gteCheckStall(u32 op) { gteCheckStallRaw(gte_cycletab[op], &psxRegs); } -static inline u32 MFC2(int reg) { - psxCP2Regs *regs = &psxRegs.CP2; +u32 MFC2(struct psxCP2Regs *regs, int reg) { switch (reg) { case 1: case 3: @@ -311,7 +310,7 @@ static inline u32 MFC2(int reg) { case 9: case 10: case 11: - psxRegs.CP2D.r[reg] = (s32)psxRegs.CP2D.p[reg].sw.l; + regs->CP2D.r[reg] = (s32)regs->CP2D.p[reg].sw.l; break; case 7: @@ -319,25 +318,24 @@ static inline u32 MFC2(int reg) { case 17: case 18: case 19: - psxRegs.CP2D.r[reg] = (u32)psxRegs.CP2D.p[reg].w.l; + regs->CP2D.r[reg] = (u32)regs->CP2D.p[reg].w.l; break; case 15: - psxRegs.CP2D.r[reg] = gteSXY2; + regs->CP2D.r[reg] = gteSXY2; break; case 28: case 29: - psxRegs.CP2D.r[reg] = LIM(gteIR1 >> 7, 0x1f, 0, 0) | + regs->CP2D.r[reg] = LIM(gteIR1 >> 7, 0x1f, 0, 0) | (LIM(gteIR2 >> 7, 0x1f, 0, 0) << 5) | (LIM(gteIR3 >> 7, 0x1f, 0, 0) << 10); break; } - return psxRegs.CP2D.r[reg]; + return regs->CP2D.r[reg]; } -static inline void MTC2(u32 value, int reg) { - psxCP2Regs *regs = &psxRegs.CP2; +void MTC2(struct psxCP2Regs *regs, u32 value, int reg) { switch (reg) { case 15: gteSXY0 = gteSXY1; @@ -379,11 +377,11 @@ static inline void MTC2(u32 value, int reg) { return; default: - psxRegs.CP2D.r[reg] = value; + regs->CP2D.r[reg] = value; } } -static inline void CTC2(u32 value, int reg) { +void CTC2(struct psxCP2Regs *regs, u32 value, int reg) { switch (reg) { case 4: case 12: @@ -401,45 +399,7 @@ static inline void CTC2(u32 value, int reg) { break; } - psxRegs.CP2C.r[reg] = value; -} - -void gteMFC2() { - if (!_Rt_) return; - psxRegs.GPR.r[_Rt_] = MFC2(_Rd_); -} - -void gteCFC2() { - if (!_Rt_) return; - psxRegs.GPR.r[_Rt_] = psxRegs.CP2C.r[_Rd_]; -} - -void gteMTC2() { - MTC2(psxRegs.GPR.r[_Rt_], _Rd_); -} - -void gteCTC2() { - CTC2(psxRegs.GPR.r[_Rt_], _Rd_); -} - -#define _oB_ (psxRegs.GPR.r[_Rs_] + _Imm_) - -void gteLWC2() { - MTC2(psxMemRead32(_oB_), _Rt_); -} - -void gteSWC2() { - psxMemWrite32(_oB_, MFC2(_Rt_)); -} - -void gteLWC2_stall() { - gteCheckStall(0); - gteLWC2(); -} - -void gteSWC2_stall() { - gteCheckStall(0); - gteSWC2(); + regs->CP2C.r[reg] = value; } #endif // FLAGLESS diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index 75e9e5b3..f1dcc66a 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -72,14 +72,9 @@ extern const unsigned char gte_cycletab[64]; int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs); void gteCheckStall(u32 op); -void gteMFC2(); -void gteCFC2(); -void gteMTC2(); -void gteCTC2(); -void gteLWC2(); -void gteSWC2(); -void gteLWC2_stall(); -void gteSWC2_stall(); +u32 MFC2(struct psxCP2Regs *regs, int reg); +void MTC2(struct psxCP2Regs *regs, u32 value, int reg); +void CTC2(struct psxCP2Regs *regs, u32 value, int reg); void gteRTPS(struct psxCP2Regs *regs); void gteOP(struct psxCP2Regs *regs); diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index bf64e0f0..aa093564 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -105,13 +105,10 @@ void gen_interupt() next_interupt, next_interupt - psxRegs.cycle); } -// from interpreter -extern void MTC0(int reg, u32 val); - void pcsx_mtc0(u32 reg, u32 val) { evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); + MTC0(&psxRegs, reg, val); gen_interupt(); if (Cause & Status & 0x0300) // possible sw irq pending_exception = 1; @@ -120,7 +117,7 @@ void pcsx_mtc0(u32 reg, u32 val) void pcsx_mtc0_ds(u32 reg, u32 val) { evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); + MTC0(&psxRegs, reg, val); } void new_dyna_before_save(void) @@ -299,15 +296,13 @@ const uint64_t gte_reg_writes[64] = { static int ari64_init() { static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); - extern void (*psxCP2[64])(); - extern void psxNULL(); size_t i; new_dynarec_init(); new_dyna_pcsx_mem_init(); for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) - if (psxCP2[i] != psxNULL) + if (psxCP2[i] != gteNULL) gte_handlers[i] = psxCP2[i]; #if defined(__arm__) && !defined(DRC_DBG) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1c708566..2b57e59d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -4152,7 +4152,7 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) { - void *hlefunc = psxNULL; + void *hlefunc = gteNULL; uint32_t hleCode = source[i] & 0x03ffffff; if (hleCode < ARRAY_SIZE(psxHLEt)) hlefunc = psxHLEt[hleCode]; diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index e7e32690..ea20cab9 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -26,6 +26,7 @@ #include "gte.h" #include "psxhle.h" #include "psxinterpreter.h" +#include #include //#include "debug.h" #define ProcessDebug() @@ -42,18 +43,26 @@ static u32 branchPC; #define debugI() #endif +#ifdef __i386__ +#define INT_ATTR __attribute__((regparm(2))) +#else +#define INT_ATTR +#endif +#ifndef INVALID_PTR +#define INVALID_PTR NULL +#endif + // Subsets -void (*psxBSC[64])(); -void (*psxSPC[64])(); -void (*psxREG[32])(); -void (*psxCP0[32])(); -void (*psxCP2[64])(struct psxCP2Regs *regs); -void (*psxCP2BSC[32])(); - -static u32 fetchNoCache(u32 pc) +static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code); +static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code); + +static u32 INT_ATTR fetchNoCache(u8 **memRLUT, u32 pc) { - u32 *code = (u32 *)PSXM(pc); - return ((code == NULL) ? 0 : SWAP32(*code)); + u8 *base = memRLUT[pc >> 16]; + if (base == INVALID_PTR) + return 0; + u32 *code = (u32 *)(base + (pc & 0xfffc)); + return SWAP32(*code); } /* @@ -65,7 +74,7 @@ static struct cache_entry { u32 data[4]; } ICache[256]; -static u32 fetchICache(u32 pc) +static u32 INT_ATTR fetchICache(u8 **memRLUT, u32 pc) { // cached? if (pc < 0xa0000000) @@ -75,9 +84,11 @@ static u32 fetchICache(u32 pc) if (((entry->tag ^ pc) & 0xfffffff0) != 0 || pc < entry->tag) { - u32 *code = (u32 *)PSXM(pc & ~0x0f); - if (!code) + const u8 *base = memRLUT[pc >> 16]; + const u32 *code; + if (base == INVALID_PTR) return 0; + code = (u32 *)(base + (pc & 0xfff0)); entry->tag = pc; // treat as 4 words, although other configurations are said to be possible @@ -92,10 +103,10 @@ static u32 fetchICache(u32 pc) return entry->data[(pc & 0x0f) >> 2]; } - return fetchNoCache(pc); + return fetchNoCache(memRLUT, pc); } -u32 (*fetch)(u32 pc) = fetchNoCache; +static u32 (INT_ATTR *fetch)(u8 **memRLUT, u32 pc) = fetchNoCache; static void delayRead(int reg, u32 bpc) { u32 rold, rnew; @@ -103,7 +114,7 @@ static void delayRead(int reg, u32 bpc) { // SysPrintf("delayRead at %x!\n", psxRegs.pc); rold = psxRegs.GPR.r[reg]; - psxBSC[psxRegs.code >> 26](); // branch delay load + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); // branch delay load rnew = psxRegs.GPR.r[reg]; psxRegs.pc = bpc; @@ -126,7 +137,7 @@ static void delayWrite(int reg, u32 bpc) { // no changes from normal behavior - psxBSC[psxRegs.code >> 26](); + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); branch = 0; psxRegs.pc = bpc; @@ -146,6 +157,50 @@ static void delayReadWrite(int reg, u32 bpc) { psxBranchTest(); } +/**** R3000A Instruction Macros ****/ +#define _PC_ regs_->pc // The next PC to be executed + +#define _fOp_(code) ((code >> 26) ) // The opcode part of the instruction register +#define _fFunct_(code) ((code ) & 0x3F) // The funct part of the instruction register +#define _fRd_(code) ((code >> 11) & 0x1F) // The rd part of the instruction register +#define _fRt_(code) ((code >> 16) & 0x1F) // The rt part of the instruction register +#define _fRs_(code) ((code >> 21) & 0x1F) // The rs part of the instruction register +#define _fSa_(code) ((code >> 6) & 0x1F) // The sa part of the instruction register +#define _fIm_(code) ((u16)code) // The immediate part of the instruction register +#define _fTarget_(code) (code & 0x03ffffff) // The target part of the instruction register + +#define _fImm_(code) ((s16)code) // sign-extended immediate +#define _fImmU_(code) (code&0xffff) // zero-extended immediate + +#define _Op_ _fOp_(code) +#define _Funct_ _fFunct_(code) +#define _Rd_ _fRd_(code) +#define _Rt_ _fRt_(code) +#define _Rs_ _fRs_(code) +#define _Sa_ _fSa_(code) +#define _Im_ _fIm_(code) +#define _Target_ _fTarget_(code) + +#define _Imm_ _fImm_(code) +#define _ImmU_ _fImmU_(code) + +#define _rRs_ regs_->GPR.r[_Rs_] // Rs register +#define _rRt_ regs_->GPR.r[_Rt_] // Rt register +#define _rRd_ regs_->GPR.r[_Rd_] // Rd register +#define _rSa_ regs_->GPR.r[_Sa_] // Sa register +#define _rFs_ regs_->CP0.r[_Rd_] // Fs register + +#define _rHi_ regs_->GPR.n.hi // The HI register +#define _rLo_ regs_->GPR.n.lo // The LO register + +#define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction +#define _BranchTarget_ ((s16)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction + +#define _SetLink(x) regs_->GPR.r[x] = _PC_ + 4; // Sets the return address in the link register + +#define OP(name) \ + static inline INT_ATTR void name(psxRegisters *regs_, u32 code) + // this defines shall be used with the tmp // of the next func (instead of _Funct_...) #define _tFunct_ ((tmp ) & 0x3F) // The funct part of the instruction register @@ -154,7 +209,10 @@ static void delayReadWrite(int reg, u32 bpc) { #define _tRs_ ((tmp >> 21) & 0x1F) // The rs part of the instruction register #define _tSa_ ((tmp >> 6) & 0x1F) // The sa part of the instruction register -int psxTestLoadDelay(int reg, u32 tmp) { +#define _i32(x) (s32)(x) +#define _u32(x) (u32)(x) + +static int psxTestLoadDelay(int reg, u32 tmp) { if (tmp == 0) return 0; // NOP switch (tmp >> 26) { case 0x00: // SPECIAL @@ -310,8 +368,8 @@ int psxTestLoadDelay(int reg, u32 tmp) { return 0; } -void psxDelayTest(int reg, u32 bpc) { - u32 tmp = fetch(bpc); +static void psxDelayTest(int reg, u32 bpc) { + u32 tmp = fetch(psxMemRLUT, bpc); branch = 1; switch (psxTestLoadDelay(reg, tmp)) { @@ -322,7 +380,7 @@ void psxDelayTest(int reg, u32 bpc) { case 3: delayWrite(reg, bpc); return; } - psxBSC[psxRegs.code >> 26](); + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); branch = 0; psxRegs.pc = bpc; @@ -330,10 +388,10 @@ void psxDelayTest(int reg, u32 bpc) { psxBranchTest(); } -static u32 psxBranchNoDelay(void) { - u32 temp; +static u32 psxBranchNoDelay(psxRegisters *regs_) { + u32 temp, code; - psxRegs.code = fetch(psxRegs.pc); + regs_->code = code = fetch(psxMemRLUT, regs_->pc); switch (_Op_) { case 0x00: // SPECIAL switch (_Funct_) { @@ -408,7 +466,7 @@ static int psxDelayBranchExec(u32 tar) { static int psxDelayBranchTest(u32 tar1) { u32 tar2, tmp1, tmp2; - tar2 = psxBranchNoDelay(); + tar2 = psxBranchNoDelay(&psxRegs); if (tar2 == (u32)-1) return 0; @@ -421,7 +479,7 @@ static int psxDelayBranchTest(u32 tar1) { * has no normal delay slot, instruction at tar1 was fetched instead) */ psxRegs.pc = tar1; - tmp1 = psxBranchNoDelay(); + tmp1 = psxBranchNoDelay(&psxRegs); if (tmp1 == (u32)-1) { return psxDelayBranchExec(tar2); } @@ -434,7 +492,7 @@ static int psxDelayBranchTest(u32 tar1) { * - jump to target of that branch (tmp1) */ psxRegs.pc = tar2; - tmp2 = psxBranchNoDelay(); + tmp2 = psxBranchNoDelay(&psxRegs); if (tmp2 == (u32)-1) { return psxDelayBranchExec(tmp1); } @@ -451,7 +509,7 @@ static int psxDelayBranchTest(u32 tar1) { } static void doBranch(u32 tar) { - u32 tmp; + u32 tmp, code; branch2 = branch = 1; branchPC = tar; @@ -460,7 +518,7 @@ static void doBranch(u32 tar) { if (psxDelayBranchTest(tar)) return; - psxRegs.code = fetch(psxRegs.pc); + psxRegs.code = code = fetch(psxMemRLUT, psxRegs.pc); debugI(); @@ -501,7 +559,7 @@ static void doBranch(u32 tar) { break; } - psxBSC[psxRegs.code >> 26](); + psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); branch = 0; psxRegs.pc = branchPC; @@ -513,107 +571,104 @@ static void doBranch(u32 tar) { * Arithmetic with immediate operand * * Format: OP rt, rs, immediate * *********************************************************/ -void psxADDI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im (Exception on Integer Overflow) -void psxADDIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im -void psxANDI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im -void psxORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im -void psxXORI() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im -void psxSLTI() { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; } // Rt = Rs < Im (Signed) -void psxSLTIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = Rs < Im (Unsigned) +OP(psxADDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im (Exception on Integer Overflow) +OP(psxADDIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im +OP(psxANDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im +OP(psxORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im +OP(psxXORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im +OP(psxSLTI) { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; } // Rt = Rs < Im (Signed) +OP(psxSLTIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = Rs < Im (Unsigned) /********************************************************* * Register arithmetic * * Format: OP rd, rs, rt * *********************************************************/ -void psxADD() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt (Exception on Integer Overflow) -void psxADDU() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt -void psxSUB() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt (Exception on Integer Overflow) -void psxSUBU() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt -void psxAND() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); } // Rd = Rs And Rt -void psxOR() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); } // Rd = Rs Or Rt -void psxXOR() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); } // Rd = Rs Xor Rt -void psxNOR() { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt -void psxSLT() { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); } // Rd = Rs < Rt (Signed) -void psxSLTU() { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); } // Rd = Rs < Rt (Unsigned) +OP(psxADD) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt (Exception on Integer Overflow) +OP(psxADDU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt +OP(psxSUB) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt (Exception on Integer Overflow) +OP(psxSUBU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt +OP(psxAND) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); } // Rd = Rs And Rt +OP(psxOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); } // Rd = Rs Or Rt +OP(psxXOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); } // Rd = Rs Xor Rt +OP(psxNOR) { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt +OP(psxSLT) { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); } // Rd = Rs < Rt (Signed) +OP(psxSLTU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); } // Rd = Rs < Rt (Unsigned) /********************************************************* * Register mult/div & Register trap logic * * Format: OP rs, rt * *********************************************************/ -void psxDIV() { - if (!_i32(_rRt_)) { - _i32(_rHi_) = _i32(_rRs_); - if (_i32(_rRs_) & 0x80000000) { - _i32(_rLo_) = 1; - } else { - _i32(_rLo_) = 0xFFFFFFFF; - } -/* - * Notaz said that this was "not needed" for ARM platforms and could slow it down so let's disable for ARM. - * This fixes a crash issue that can happen when running Amidog's CPU test. - * (It still stays stuck to a black screen but at least it doesn't crash anymore) - */ +OP(psxDIV) { + if (!_rRt_) { + _rHi_ = _rRs_; + if (_rRs_ & 0x80000000) { + _rLo_ = 1; + } else { + _rLo_ = 0xFFFFFFFF; + } + } #if !defined(__arm__) && !defined(__aarch64__) - } else if (_i32(_rRs_) == 0x80000000 && _i32(_rRt_) == 0xFFFFFFFF) { - _i32(_rLo_) = 0x80000000; - _i32(_rHi_) = 0; + else if (_rRs_ == 0x80000000 && _rRt_ == 0xFFFFFFFF) { + _rLo_ = 0x80000000; + _rHi_ = 0; + } #endif - } else { - _i32(_rLo_) = _i32(_rRs_) / _i32(_rRt_); - _i32(_rHi_) = _i32(_rRs_) % _i32(_rRt_); - } + else { + _rLo_ = _i32(_rRs_) / _i32(_rRt_); + _rHi_ = _i32(_rRs_) % _i32(_rRt_); + } } -void psxDIV_stall() { - psxRegs.muldivBusyCycle = psxRegs.cycle + 37; - psxDIV(); +OP(psxDIV_stall) { + regs_->muldivBusyCycle = regs_->cycle + 37; + psxDIV(regs_, code); } -void psxDIVU() { +OP(psxDIVU) { if (_rRt_ != 0) { _rLo_ = _rRs_ / _rRt_; _rHi_ = _rRs_ % _rRt_; } else { - _i32(_rLo_) = 0xffffffff; - _i32(_rHi_) = _i32(_rRs_); + _rLo_ = 0xffffffff; + _rHi_ = _rRs_; } } -void psxDIVU_stall() { - psxRegs.muldivBusyCycle = psxRegs.cycle + 37; - psxDIVU(); +OP(psxDIVU_stall) { + regs_->muldivBusyCycle = regs_->cycle + 37; + psxDIVU(regs_, code); } -void psxMULT() { - u64 res = (s64)((s64)_i32(_rRs_) * (s64)_i32(_rRt_)); +OP(psxMULT) { + u64 res = (s64)_i32(_rRs_) * _i32(_rRt_); - psxRegs.GPR.n.lo = (u32)(res & 0xffffffff); - psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); + regs_->GPR.n.lo = (u32)res; + regs_->GPR.n.hi = (u32)(res >> 32); } -void psxMULT_stall() { +OP(psxMULT_stall) { // approximate, but maybe good enough u32 rs = _rRs_; u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1)); u32 c = 7 + (2 - (lz / 11)) * 4; - psxRegs.muldivBusyCycle = psxRegs.cycle + c; - psxMULT(); + regs_->muldivBusyCycle = regs_->cycle + c; + psxMULT(regs_, code); } -void psxMULTU() { - u64 res = (u64)((u64)_u32(_rRs_) * (u64)_u32(_rRt_)); +OP(psxMULTU) { + u64 res = (u64)_u32(_rRs_) * _u32(_rRt_); - psxRegs.GPR.n.lo = (u32)(res & 0xffffffff); - psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); + regs_->GPR.n.lo = (u32)(res & 0xffffffff); + regs_->GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } -void psxMULTU_stall() { +OP(psxMULTU_stall) { // approximate, but maybe good enough u32 lz = __builtin_clz(_rRs_ | 1); u32 c = 7 + (2 - (lz / 11)) * 4; - psxRegs.muldivBusyCycle = psxRegs.cycle + c; - psxMULTU(); + regs_->muldivBusyCycle = regs_->cycle + c; + psxMULTU(regs_, code); } /********************************************************* @@ -623,80 +678,88 @@ void psxMULTU_stall() { #define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); #define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } } -void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 -void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -void psxBGTZ() { RepZBranchi32(>) } // Branch if Rs > 0 -void psxBLEZ() { RepZBranchi32(<=) } // Branch if Rs <= 0 -void psxBLTZ() { RepZBranchi32(<) } // Branch if Rs < 0 -void psxBLTZAL() { RepZBranchLinki32(<) } // Branch if Rs < 0 and link +OP(psxBGEZ) { RepZBranchi32(>=) } // Branch if Rs >= 0 +OP(psxBGEZAL) { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link +OP(psxBGTZ) { RepZBranchi32(>) } // Branch if Rs > 0 +OP(psxBLEZ) { RepZBranchi32(<=) } // Branch if Rs <= 0 +OP(psxBLTZ) { RepZBranchi32(<) } // Branch if Rs < 0 +OP(psxBLTZAL) { RepZBranchLinki32(<) } // Branch if Rs < 0 and link /********************************************************* * Shift arithmetic with constant shift * * Format: OP rd, rt, sa * *********************************************************/ -void psxSLL() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa -void psxSRA() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic) -void psxSRL() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical) +OP(psxSLL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa +OP(psxSRA) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic) +OP(psxSRL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical) /********************************************************* * Shift arithmetic with variant register shift * * Format: OP rd, rt, rs * *********************************************************/ -void psxSLLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs -void psxSRAV() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic) -void psxSRLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical) +OP(psxSLLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs +OP(psxSRAV) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic) +OP(psxSRLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical) /********************************************************* * Load higher 16 bits of the first word in GPR with imm * * Format: OP rt, immediate * *********************************************************/ -void psxLUI() { if (!_Rt_) return; _u32(_rRt_) = psxRegs.code << 16; } // Upper halfword of Rt = Im +OP(psxLUI) { if (!_Rt_) return; _rRt_ = code << 16; } // Upper halfword of Rt = Im /********************************************************* * Move from HI/LO to GPR * * Format: OP rd * *********************************************************/ -void psxMFHI() { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi -void psxMFLO() { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo +OP(psxMFHI) { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi +OP(psxMFLO) { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo -static void mflohiCheckStall(void) +static void mflohiCheckStall(psxRegisters *regs_) { - u32 left = psxRegs.muldivBusyCycle - psxRegs.cycle; + u32 left = regs_->muldivBusyCycle - regs_->cycle; if (left <= 37) { //printf("muldiv stall %u\n", left); - psxRegs.cycle = psxRegs.muldivBusyCycle; + regs_->cycle = regs_->muldivBusyCycle; } } -void psxMFHI_stall() { mflohiCheckStall(); psxMFHI(); } -void psxMFLO_stall() { mflohiCheckStall(); psxMFLO(); } +OP(psxMFHI_stall) { mflohiCheckStall(regs_); psxMFHI(regs_, code); } +OP(psxMFLO_stall) { mflohiCheckStall(regs_); psxMFLO(regs_, code); } /********************************************************* * Move to GPR to HI/LO & Register jump * * Format: OP rs * *********************************************************/ -void psxMTHI() { _rHi_ = _rRs_; } // Hi = Rs -void psxMTLO() { _rLo_ = _rRs_; } // Lo = Rs +OP(psxMTHI) { _rHi_ = _rRs_; } // Hi = Rs +OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs /********************************************************* * Special purpose instructions * * Format: OP * *********************************************************/ -void psxBREAK() { - psxRegs.pc -= 4; +OP(psxBREAK) { + regs_->pc -= 4; psxException(0x24, branch); } -void psxSYSCALL() { - psxRegs.pc -= 4; +OP(psxSYSCALL) { + regs_->pc -= 4; psxException(0x20, branch); } -void psxRFE() { +static inline void psxTestSWInts(psxRegisters *regs_) { + if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 && + regs_->CP0.n.Status & 0x1) { + regs_->CP0.n.Cause &= ~0x7c; + psxException(regs_->CP0.n.Cause, branch); + } +} + +OP(psxRFE) { // SysPrintf("psxRFE\n"); - psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) | - ((psxRegs.CP0.n.Status & 0x3c) >> 2); - psxTestSWInts(); + regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) | + ((regs_->CP0.n.Status & 0x3c) >> 2); + psxTestSWInts(regs_); } /********************************************************* @@ -705,26 +768,26 @@ void psxRFE() { *********************************************************/ #define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); -void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt -void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt +OP(psxBEQ) { RepBranchi32(==) } // Branch if Rs == Rt +OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt /********************************************************* * Jump to target * * Format: OP target * *********************************************************/ -void psxJ() { doBranch(_JumpTarget_); } -void psxJAL() { _SetLink(31); doBranch(_JumpTarget_); } +OP(psxJ) { doBranch(_JumpTarget_); } +OP(psxJAL) { _SetLink(31); doBranch(_JumpTarget_); } /********************************************************* * Register jump * * Format: OP rs, rd * *********************************************************/ -void psxJR() { +OP(psxJR) { doBranch(_rRs_ & ~3); psxJumpTest(); } -void psxJALR() { +OP(psxJALR) { u32 temp = _u32(_rRs_); if (_Rd_) { _SetLink(_Rd_); } doBranch(temp & ~3); @@ -735,59 +798,23 @@ void psxJALR() { * Format: OP rt, offset(base) * *********************************************************/ -#define _oB_ (_u32(_rRs_) + _Imm_) - -void psxLB() { - if (_Rt_) { - _i32(_rRt_) = (signed char)psxMemRead8(_oB_); - } else { - psxMemRead8(_oB_); - } -} - -void psxLBU() { - if (_Rt_) { - _u32(_rRt_) = psxMemRead8(_oB_); - } else { - psxMemRead8(_oB_); - } -} - -void psxLH() { - if (_Rt_) { - _i32(_rRt_) = (short)psxMemRead16(_oB_); - } else { - psxMemRead16(_oB_); - } -} - -void psxLHU() { - if (_Rt_) { - _u32(_rRt_) = psxMemRead16(_oB_); - } else { - psxMemRead16(_oB_); - } -} +#define _oB_ (regs_->GPR.r[_Rs_] + _Imm_) -void psxLW() { - if (_Rt_) { - _u32(_rRt_) = psxMemRead32(_oB_); - } else { - psxMemRead32(_oB_); - } -} - -u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 }; -u32 LWL_SHIFT[4] = { 24, 16, 8, 0 }; +OP(psxLB) { u32 v = (s8)psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLBU) { u32 v = psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLH) { u32 v = (s16)psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLHU) { u32 v = psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; } +OP(psxLW) { u32 v = psxMemRead32(_oB_); if (_Rt_) _rRt_ = v; } -void psxLWL() { +OP(psxLWL) { + static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 }; + static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); if (!_Rt_) return; - _u32(_rRt_) = ( _u32(_rRt_) & LWL_MASK[shift]) | - ( mem << LWL_SHIFT[shift]); + _rRt_ = (_u32(_rRt_) & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]); /* Mem = 1234. Reg = abcd @@ -799,17 +826,15 @@ void psxLWL() { */ } -u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 }; -u32 LWR_SHIFT[4] = { 0, 8, 16, 24 }; - -void psxLWR() { +OP(psxLWR) { + static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 }; + static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); if (!_Rt_) return; - _u32(_rRt_) = ( _u32(_rRt_) & LWR_MASK[shift]) | - ( mem >> LWR_SHIFT[shift]); + _rRt_ = (_u32(_rRt_) & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]); /* Mem = 1234. Reg = abcd @@ -821,14 +846,13 @@ void psxLWR() { */ } -void psxSB() { psxMemWrite8 (_oB_, _rRt_ & 0xff); } -void psxSH() { psxMemWrite16(_oB_, _rRt_ & 0xffff); } -void psxSW() { psxMemWrite32(_oB_, _rRt_); } - -u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 }; -u32 SWL_SHIFT[4] = { 24, 16, 8, 0 }; +OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); } +OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); } +OP(psxSW) { psxMemWrite32(_oB_, _rRt_); } -void psxSWL() { +OP(psxSWL) { + static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 }; + static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); @@ -845,10 +869,9 @@ void psxSWL() { */ } -u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff }; -u32 SWR_SHIFT[4] = { 0, 8, 16, 24 }; - -void psxSWR() { +OP(psxSWR) { + static const u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff }; + static const u32 SWR_SHIFT[4] = { 0, 8, 16, 24 }; u32 addr = _oB_; u32 shift = addr & 3; u32 mem = psxMemRead32(addr & ~3); @@ -870,88 +893,139 @@ void psxSWR() { * Moves between GPR and COPx * * Format: OP rt, fs * *********************************************************/ -void psxMFC0() { if (!_Rt_) return; _i32(_rRt_) = (int)_rFs_; } -void psxCFC0() { if (!_Rt_) return; _i32(_rRt_) = (int)_rFs_; } - -void psxTestSWInts() { - if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300 && - psxRegs.CP0.n.Status & 0x1) { - psxRegs.CP0.n.Cause &= ~0x7c; - psxException(psxRegs.CP0.n.Cause, branch); - } -} +OP(psxMFC0) { if (!_Rt_) return; _rRt_ = _rFs_; } +OP(psxCFC0) { if (!_Rt_) return; _rRt_ = _rFs_; } -void MTC0(int reg, u32 val) { +void MTC0(psxRegisters *regs_, int reg, u32 val) { // SysPrintf("MTC0 %d: %x\n", reg, val); switch (reg) { case 12: // Status - psxRegs.CP0.r[12] = val; - psxTestSWInts(); + regs_->CP0.r[12] = val; + psxTestSWInts(regs_); break; case 13: // Cause - psxRegs.CP0.n.Cause &= ~0x0300; - psxRegs.CP0.n.Cause |= val & 0x0300; - psxTestSWInts(); + regs_->CP0.n.Cause &= ~0x0300; + regs_->CP0.n.Cause |= val & 0x0300; + psxTestSWInts(regs_); break; default: - psxRegs.CP0.r[reg] = val; + regs_->CP0.r[reg] = val; break; } } -void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); } -void psxCTC0() { MTC0(_Rd_, _u32(_rRt_)); } +OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } +OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } /********************************************************* * Unknow instruction (would generate an exception) * * Format: ? * *********************************************************/ -void psxNULL() { +static inline void psxNULL_(void) { #ifdef PSXCPU_LOG PSXCPU_LOG("psx: Unimplemented op %x\n", psxRegs.code); #endif } -void psxSPECIAL() { - psxSPC[_Funct_](); -} +OP(psxNULL) { psxNULL_(); } +void gteNULL(struct psxCP2Regs *regs) { psxNULL_(); } -void psxREGIMM() { - psxREG[_Rt_](); +OP(psxSPECIAL) { + psxSPC[_Funct_](regs_, code); } -void psxCOP0() { - psxCP0[_Rs_](); +OP(psxCOP0) { + switch (_Rs_) { + case 0x00: psxMFC0(regs_, code); break; + case 0x02: psxCFC0(regs_, code); break; + case 0x04: psxMTC0(regs_, code); break; + case 0x06: psxCTC0(regs_, code); break; + case 0x10: psxRFE(regs_, code); break; + default: psxNULL_(); break; + } } -void psxCOP2() { - psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D); +OP(psxCOP2) { + psxCP2[_Funct_](®s_->CP2); } -void psxCOP2_stall() { +OP(psxCOP2_stall) { u32 f = _Funct_; gteCheckStall(f); - psxCP2[f]((struct psxCP2Regs *)&psxRegs.CP2D); + psxCP2[f](®s_->CP2); +} + +OP(gteMFC2) { + if (!_Rt_) return; + regs_->GPR.r[_Rt_] = MFC2(®s_->CP2, _Rd_); +} + +OP(gteCFC2) { + if (!_Rt_) return; + regs_->GPR.r[_Rt_] = regs_->CP2C.r[_Rd_]; +} + +OP(gteMTC2) { + MTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_); +} + +OP(gteCTC2) { + CTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_); +} + +OP(gteLWC2) { + MTC2(®s_->CP2, psxMemRead32(_oB_), _Rt_); +} + +OP(gteSWC2) { + psxMemWrite32(_oB_, MFC2(®s_->CP2, _Rt_)); +} + +OP(gteLWC2_stall) { + gteCheckStall(0); + gteLWC2(regs_, code); } -void psxBASIC(struct psxCP2Regs *regs) { - psxCP2BSC[_Rs_](); +OP(gteSWC2_stall) { + gteCheckStall(0); + gteSWC2(regs_, code); +} + +static void psxBASIC(struct psxCP2Regs *cp2regs) { + psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2)); + u32 code = regs_->code; + assert(regs_ == &psxRegs); + switch (_Rs_) { + case 0x00: gteMFC2(regs_, code); break; + case 0x02: gteCFC2(regs_, code); break; + case 0x04: gteMTC2(regs_, code); break; + case 0x06: gteCTC2(regs_, code); break; + default: psxNULL_(); break; + } +} + +OP(psxREGIMM) { + switch (_Rt_) { + case 0x00: psxBLTZ(regs_, code); break; + case 0x01: psxBGEZ(regs_, code); break; + case 0x10: psxBLTZAL(regs_, code); break; + case 0x11: psxBGEZAL(regs_, code); break; + default: psxNULL_(); break; + } } -void psxHLE() { -// psxHLEt[psxRegs.code & 0xffff](); -// psxHLEt[psxRegs.code & 0x07](); // HDHOSHY experimental patch - uint32_t hleCode = psxRegs.code & 0x03ffffff; +OP(psxHLE) { + uint32_t hleCode = code & 0x03ffffff; if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) { - psxNULL(); + psxNULL_(); } else { psxHLEt[hleCode](); } } -void (*psxBSC[64])() = { +static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = { psxSPECIAL, psxREGIMM, psxJ , psxJAL , psxBEQ , psxBNE , psxBLEZ, psxBGTZ, psxADDI , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI , psxCOP0 , psxNULL , psxCOP2, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, @@ -962,8 +1036,7 @@ void (*psxBSC[64])() = { psxNULL , psxNULL , gteSWC2, psxHLE , psxNULL, psxNULL, psxNULL, psxNULL }; - -void (*psxSPC[64])() = { +static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = { psxSLL , psxNULL , psxSRL , psxSRA , psxSLLV , psxNULL , psxSRLV, psxSRAV, psxJR , psxJALR , psxNULL, psxNULL, psxSYSCALL, psxBREAK, psxNULL, psxNULL, psxMFHI, psxMTHI , psxMFLO, psxMTLO, psxNULL , psxNULL , psxNULL, psxNULL, @@ -974,39 +1047,17 @@ void (*psxSPC[64])() = { psxNULL, psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, psxNULL }; -void (*psxREG[32])() = { - psxBLTZ , psxBGEZ , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL , psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxBLTZAL, psxBGEZAL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL , psxNULL , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL -}; - -void (*psxCP0[32])() = { - psxMFC0, psxNULL, psxCFC0, psxNULL, psxMTC0, psxNULL, psxCTC0, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxRFE , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL -}; - void (*psxCP2[64])(struct psxCP2Regs *regs) = { - psxBASIC, gteRTPS , psxNULL , psxNULL, psxNULL, psxNULL , gteNCLIP, psxNULL, // 00 - psxNULL , psxNULL , psxNULL , psxNULL, gteOP , psxNULL , psxNULL , psxNULL, // 08 - gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , psxNULL , gteNCDT , psxNULL, // 10 - psxNULL , psxNULL , psxNULL , gteNCCS, gteCC , psxNULL , gteNCS , psxNULL, // 18 - gteNCT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 20 - gteSQR , gteDCPL , gteDPCT , psxNULL, psxNULL, gteAVSZ3, gteAVSZ4, psxNULL, // 28 - gteRTPT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 30 - psxNULL , psxNULL , psxNULL , psxNULL, psxNULL, gteGPF , gteGPL , gteNCCT // 38 -}; - -void (*psxCP2BSC[32])() = { - gteMFC2, psxNULL, gteCFC2, psxNULL, gteMTC2, psxNULL, gteCTC2, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, - psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL + psxBASIC, gteRTPS , gteNULL , gteNULL, gteNULL, gteNULL , gteNCLIP, gteNULL, // 00 + gteNULL , gteNULL , gteNULL , gteNULL, gteOP , gteNULL , gteNULL , gteNULL, // 08 + gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , gteNULL , gteNCDT , gteNULL, // 10 + gteNULL , gteNULL , gteNULL , gteNCCS, gteCC , gteNULL , gteNCS , gteNULL, // 18 + gteNCT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 20 + gteSQR , gteDCPL , gteDPCT , gteNULL, gteNULL, gteAVSZ3, gteAVSZ4, gteNULL, // 28 + gteRTPT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 30 + gteNULL , gteNULL , gteNULL , gteNULL, gteNULL, gteGPF , gteGPL , gteNCCT // 38 }; - /////////////////////////////////////////// static int intInit() { @@ -1017,15 +1068,35 @@ static void intReset() { memset(&ICache, 0xff, sizeof(ICache)); } -void intExecute() { +static inline void execI_(u8 **memRLUT, psxRegisters *regs_) { + regs_->code = fetch(memRLUT, regs_->pc); + + debugI(); + + if (Config.Debug) ProcessDebug(); + + regs_->pc += 4; + regs_->cycle += BIAS; + + psxBSC[regs_->code >> 26](regs_, regs_->code); +} + +static void intExecute() { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; extern int stop; - for (;!stop;) - execI(); + + while (!stop) + execI_(memRLUT, regs_); } -void intExecuteBlock() { +static void intExecuteBlock() { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; + branch2 = 0; - while (!branch2) execI(); + while (!branch2) + execI_(memRLUT, regs_); } static void intClear(u32 Addr, u32 Size) { @@ -1083,18 +1154,9 @@ void intApplyConfig() { static void intShutdown() { } -// interpreter execution +// single step (may do several ops in case of a branch) void execI() { - psxRegs.code = fetch(psxRegs.pc); - - debugI(); - - if (Config.Debug) ProcessDebug(); - - psxRegs.pc += 4; - psxRegs.cycle += BIAS; - - psxBSC[psxRegs.code >> 26](); + execI_(psxMemRLUT, &psxRegs); } R3000Acpu psxInt = { diff --git a/libpcsxcore/psxinterpreter.h b/libpcsxcore/psxinterpreter.h index 89dd7ea1..87c7a324 100644 --- a/libpcsxcore/psxinterpreter.h +++ b/libpcsxcore/psxinterpreter.h @@ -1,7 +1,11 @@ - -extern u32 (*fetch)(u32 pc); +#ifndef __PSXINTERPRETER_H__ +#define __PSXINTERPRETER_H__ // called by "new_dynarec" void execI(); -void psxNULL(); void intApplyConfig(); +void MTC0(psxRegisters *regs_, int reg, u32 val); +void gteNULL(struct psxCP2Regs *regs); +void (*psxCP2[64])(struct psxCP2Regs *regs); + +#endif // __PSXINTERPRETER_H__ diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c index a9b800e3..ba22d45f 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c @@ -83,15 +83,14 @@ void psxShutdown() { } void psxException(u32 code, u32 bd) { - psxRegs.code = fetch(psxRegs.pc); + psxRegs.code = PSXMu32(psxRegs.pc); if (!Config.HLE && ((((psxRegs.code) >> 24) & 0xfe) == 0x4a)) { // "hokuto no ken" / "Crash Bandicot 2" ... // BIOS does not allow to return to GTE instructions // (just skips it, supposedly because it's scheduled already) // so we execute it here - extern void (*psxCP2[64])(void *cp2regs); - psxCP2[psxRegs.code & 0x3f](&psxRegs.CP2D); + psxCP2[psxRegs.code & 0x3f](&psxRegs.CP2); } // Set the Cause diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 49afcb1c..f99e03ba 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -219,85 +219,12 @@ void new_dyna_freeze(void *f, int mode); #define new_dyna_set_event(e, c) \ new_dyna_set_event_abs(e, psxRegs.cycle + (c)) -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - -#define _i32(x) *(s32 *)&x -#define _u32(x) x - -#define _i16(x) (((short *)&x)[1]) -#define _u16(x) (((unsigned short *)&x)[1]) - -#define _i8(x) (((char *)&x)[3]) -#define _u8(x) (((unsigned char *)&x)[3]) - -#else - -#define _i32(x) *(s32 *)&x -#define _u32(x) x - -#define _i16(x) *(short *)&x -#define _u16(x) *(unsigned short *)&x - -#define _i8(x) *(char *)&x -#define _u8(x) *(unsigned char *)&x - -#endif - -/**** R3000A Instruction Macros ****/ -#define _PC_ psxRegs.pc // The next PC to be executed - -#define _fOp_(code) ((code >> 26) ) // The opcode part of the instruction register -#define _fFunct_(code) ((code ) & 0x3F) // The funct part of the instruction register -#define _fRd_(code) ((code >> 11) & 0x1F) // The rd part of the instruction register -#define _fRt_(code) ((code >> 16) & 0x1F) // The rt part of the instruction register -#define _fRs_(code) ((code >> 21) & 0x1F) // The rs part of the instruction register -#define _fSa_(code) ((code >> 6) & 0x1F) // The sa part of the instruction register -#define _fIm_(code) ((u16)code) // The immediate part of the instruction register -#define _fTarget_(code) (code & 0x03ffffff) // The target part of the instruction register - -#define _fImm_(code) ((s16)code) // sign-extended immediate -#define _fImmU_(code) (code&0xffff) // zero-extended immediate - -#define _Op_ _fOp_(psxRegs.code) -#define _Funct_ _fFunct_(psxRegs.code) -#define _Rd_ _fRd_(psxRegs.code) -#define _Rt_ _fRt_(psxRegs.code) -#define _Rs_ _fRs_(psxRegs.code) -#define _Sa_ _fSa_(psxRegs.code) -#define _Im_ _fIm_(psxRegs.code) -#define _Target_ _fTarget_(psxRegs.code) - -#define _Imm_ _fImm_(psxRegs.code) -#define _ImmU_ _fImmU_(psxRegs.code) - -#define _rRs_ psxRegs.GPR.r[_Rs_] // Rs register -#define _rRt_ psxRegs.GPR.r[_Rt_] // Rt register -#define _rRd_ psxRegs.GPR.r[_Rd_] // Rd register -#define _rSa_ psxRegs.GPR.r[_Sa_] // Sa register -#define _rFs_ psxRegs.CP0.r[_Rd_] // Fs register - -#define _c2dRs_ psxRegs.CP2D.r[_Rs_] // Rs cop2 data register -#define _c2dRt_ psxRegs.CP2D.r[_Rt_] // Rt cop2 data register -#define _c2dRd_ psxRegs.CP2D.r[_Rd_] // Rd cop2 data register -#define _c2dSa_ psxRegs.CP2D.r[_Sa_] // Sa cop2 data register - -#define _rHi_ psxRegs.GPR.n.hi // The HI register -#define _rLo_ psxRegs.GPR.n.lo // The LO register - -#define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction -#define _BranchTarget_ ((s16)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction - -#define _SetLink(x) psxRegs.GPR.r[x] = _PC_ + 4; // Sets the return address in the link register - int psxInit(); void psxReset(); void psxShutdown(); void psxException(u32 code, u32 bd); void psxBranchTest(); void psxExecuteBios(); -int psxTestLoadDelay(int reg, u32 tmp); -void psxDelayTest(int reg, u32 bpc); -void psxTestSWInts(); void psxJumpTest(); #ifdef __cplusplus -- 2.39.2