1 /***************************************************************************
2 * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU General Public License as published by *
6 * the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
9 * This program is distributed in the hope that it will be useful, *
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12 * GNU General Public License for more details. *
14 * You should have received a copy of the GNU General Public License *
15 * along with this program; if not, write to the *
16 * Free Software Foundation, Inc., *
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
18 ***************************************************************************/
21 * PSX assembly interpreter.
24 #include "psxcommon.h"
28 #include "psxinterpreter.h"
31 #include "../include/compiler_features.h"
33 // these may cause issues: because of poor timing we may step
34 // on instructions that real hardware would never reach
35 #define DO_EXCEPTION_RESERVEDI
36 #define DO_EXCEPTION_ADDR_ERR
38 static int branch = 0;
39 static int branch2 = 0;
42 #define INT_ATTR __attribute__((regparm(2)))
47 #define INVALID_PTR NULL
51 static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code);
52 static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code);
54 // get an opcode without triggering exceptions or affecting cache
55 u32 intFakeFetch(u32 pc)
57 u8 *base = psxMemRLUT[pc >> 16];
59 if (unlikely(base == INVALID_PTR))
61 code = (u32 *)(base + (pc & 0xfffc));
66 static u32 INT_ATTR fetchNoCache(psxRegisters *regs, u8 **memRLUT, u32 pc)
68 u8 *base = memRLUT[pc >> 16];
70 if (unlikely(base == INVALID_PTR)) {
71 SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra);
73 psxException(R3000E_IBE << 2, branch, ®s->CP0);
74 return 0; // execute as nop
76 code = (u32 *)(base + (pc & 0xfffc));
82 Use old CPU cache code when the RAM location is updated with new code (affects in-game racing)
84 static struct cache_entry {
89 static u32 INT_ATTR fetchICache(psxRegisters *regs, u8 **memRLUT, u32 pc)
94 // this is not how the hardware works but whatever
95 struct cache_entry *entry = &ICache[(pc & 0xff0) >> 4];
97 if (((entry->tag ^ pc) & 0xfffffff0) != 0 || pc < entry->tag)
99 const u8 *base = memRLUT[pc >> 16];
101 if (unlikely(base == INVALID_PTR)) {
102 SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra);
104 psxException(R3000E_IBE << 2, branch, ®s->CP0);
105 return 0; // execute as nop
107 code = (u32 *)(base + (pc & 0xfff0));
110 // treat as 4 words, although other configurations are said to be possible
113 case 0x00: entry->data[0] = SWAP32(code[0]);
114 case 0x04: entry->data[1] = SWAP32(code[1]);
115 case 0x08: entry->data[2] = SWAP32(code[2]);
116 case 0x0c: entry->data[3] = SWAP32(code[3]);
119 return entry->data[(pc & 0x0f) >> 2];
122 return fetchNoCache(regs, memRLUT, pc);
125 static u32 (INT_ATTR *fetch)(psxRegisters *regs_, u8 **memRLUT, u32 pc) = fetchNoCache;
127 // Make the timing events trigger faster as we are currently assuming everything
128 // takes one cycle, which is not the case on real hardware.
129 // FIXME: count cache misses, memory latencies, stalls to get rid of this
130 static inline void addCycle(void)
132 assert(psxRegs.subCycleStep >= 0x10000);
133 psxRegs.subCycle += psxRegs.subCycleStep;
134 psxRegs.cycle += psxRegs.subCycle >> 16;
135 psxRegs.subCycle &= 0xffff;
138 static void delayRead(int reg, u32 bpc) {
141 // SysPrintf("delayRead at %x!\n", psxRegs.pc);
143 rold = psxRegs.GPR.r[reg];
144 psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); // branch delay load
145 rnew = psxRegs.GPR.r[reg];
151 psxRegs.GPR.r[reg] = rold;
152 execI(); // first branch opcode
153 psxRegs.GPR.r[reg] = rnew;
158 static void delayWrite(int reg, u32 bpc) {
160 /* SysPrintf("delayWrite at %x!\n", psxRegs.pc);
162 SysPrintf("%s\n", disR3000AF(psxRegs.code, psxRegs.pc-4));
163 SysPrintf("%s\n", disR3000AF(PSXMu32(bpc), bpc));*/
165 // no changes from normal behavior
167 psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code);
175 static void delayReadWrite(int reg, u32 bpc) {
177 // SysPrintf("delayReadWrite at %x!\n", psxRegs.pc);
179 // the branch delay load is skipped
187 /**** R3000A Instruction Macros ****/
188 #define _PC_ regs_->pc // The next PC to be executed
190 #define _fOp_(code) ((code >> 26) ) // The opcode part of the instruction register
191 #define _fFunct_(code) ((code ) & 0x3F) // The funct part of the instruction register
192 #define _fRd_(code) ((code >> 11) & 0x1F) // The rd part of the instruction register
193 #define _fRt_(code) ((code >> 16) & 0x1F) // The rt part of the instruction register
194 #define _fRs_(code) ((code >> 21) & 0x1F) // The rs part of the instruction register
195 #define _fSa_(code) ((code >> 6) & 0x1F) // The sa part of the instruction register
196 #define _fIm_(code) ((u16)code) // The immediate part of the instruction register
197 #define _fTarget_(code) (code & 0x03ffffff) // The target part of the instruction register
199 #define _fImm_(code) ((s16)code) // sign-extended immediate
200 #define _fImmU_(code) (code&0xffff) // zero-extended immediate
202 #define _Op_ _fOp_(code)
203 #define _Funct_ _fFunct_(code)
204 #define _Rd_ _fRd_(code)
205 #define _Rt_ _fRt_(code)
206 #define _Rs_ _fRs_(code)
207 #define _Sa_ _fSa_(code)
208 #define _Im_ _fIm_(code)
209 #define _Target_ _fTarget_(code)
211 #define _Imm_ _fImm_(code)
212 #define _ImmU_ _fImmU_(code)
214 #define _rRs_ regs_->GPR.r[_Rs_] // Rs register
215 #define _rRt_ regs_->GPR.r[_Rt_] // Rt register
216 #define _rRd_ regs_->GPR.r[_Rd_] // Rd register
217 #define _rSa_ regs_->GPR.r[_Sa_] // Sa register
218 #define _rFs_ regs_->CP0.r[_Rd_] // Fs register
220 #define _rHi_ regs_->GPR.n.hi // The HI register
221 #define _rLo_ regs_->GPR.n.lo // The LO register
223 #define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction
224 #define _BranchTarget_ ((s16)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction
226 #define _SetLink(x) regs_->GPR.r[x] = _PC_ + 4; // Sets the return address in the link register
229 static inline INT_ATTR void name(psxRegisters *regs_, u32 code)
231 // this defines shall be used with the tmp
232 // of the next func (instead of _Funct_...)
233 #define _tFunct_ ((tmp ) & 0x3F) // The funct part of the instruction register
234 #define _tRd_ ((tmp >> 11) & 0x1F) // The rd part of the instruction register
235 #define _tRt_ ((tmp >> 16) & 0x1F) // The rt part of the instruction register
236 #define _tRs_ ((tmp >> 21) & 0x1F) // The rs part of the instruction register
237 #define _tSa_ ((tmp >> 6) & 0x1F) // The sa part of the instruction register
239 #define _i32(x) (s32)(x)
240 #define _u32(x) (u32)(x)
242 static int psxTestLoadDelay(int reg, u32 tmp) {
243 if (tmp == 0) return 0; // NOP
245 case 0x00: // SPECIAL
248 case 0x02: case 0x03: // SRL/SRA
249 if (_tRd_ == reg && _tRt_ == reg) return 1; else
250 if (_tRt_ == reg) return 2; else
251 if (_tRd_ == reg) return 3;
255 if (_tRs_ == reg) return 2;
258 if (_tRd_ == reg && _tRs_ == reg) return 1; else
259 if (_tRs_ == reg) return 2; else
260 if (_tRd_ == reg) return 3;
263 // SYSCALL/BREAK just a break;
265 case 0x20: case 0x21: case 0x22: case 0x23:
266 case 0x24: case 0x25: case 0x26: case 0x27:
267 case 0x2a: case 0x2b: // ADD/ADDU...
268 case 0x04: case 0x06: case 0x07: // SLLV...
269 if (_tRd_ == reg && (_tRt_ == reg || _tRs_ == reg)) return 1; else
270 if (_tRt_ == reg || _tRs_ == reg) return 2; else
271 if (_tRd_ == reg) return 3;
274 case 0x10: case 0x12: // MFHI/MFLO
275 if (_tRd_ == reg) return 3;
277 case 0x11: case 0x13: // MTHI/MTLO
278 if (_tRs_ == reg) return 2;
281 case 0x18: case 0x19:
282 case 0x1a: case 0x1b: // MULT/DIV...
283 if (_tRt_ == reg || _tRs_ == reg) return 2;
288 case 0x01: // REGIMM - BLTZ/BGEZ...
289 // Xenogears - lbu v0 / beq v0
290 // - no load delay (fixes battle loading)
293 // J would be just a break;
295 if (31 == reg) return 3;
298 case 0x06: case 0x07: // BLEZ/BGTZ
299 case 0x04: case 0x05: // BEQ/BNE
300 // Xenogears - lbu v0 / beq v0
301 // - no load delay (fixes battle loading)
304 case 0x08: case 0x09: case 0x0a: case 0x0b:
305 case 0x0c: case 0x0d: case 0x0e: // ADDI/ADDIU...
306 if (_tRt_ == reg && _tRs_ == reg) return 1; else
307 if (_tRs_ == reg) return 2; else
308 if (_tRt_ == reg) return 3;
312 if (_tRt_ == reg) return 3;
318 if (_tRt_ == reg) return 3;
321 if (_tRt_ == reg) return 3;
324 if (_tRt_ == reg) return 2;
327 if (_tRt_ == reg) return 2;
338 if (_tRt_ == reg) return 3;
341 if (_tRt_ == reg) return 3;
344 if (_tRt_ == reg) return 2;
347 if (_tRt_ == reg) return 2;
355 case 0x22: case 0x26: // LWL/LWR
356 if (_tRt_ == reg) return 3; else
357 if (_tRs_ == reg) return 2;
360 case 0x20: case 0x21: case 0x23:
361 case 0x24: case 0x25: // LB/LH/LW/LBU/LHU
362 if (_tRt_ == reg && _tRs_ == reg) return 1; else
363 if (_tRs_ == reg) return 2; else
364 if (_tRt_ == reg) return 3;
367 case 0x28: case 0x29: case 0x2a:
368 case 0x2b: case 0x2e: // SB/SH/SWL/SW/SWR
369 if (_tRt_ == reg || _tRs_ == reg) return 2;
372 case 0x32: case 0x3a: // LWC2/SWC2
373 if (_tRs_ == reg) return 2;
380 static void psxDelayTest(int reg, u32 bpc) {
381 u32 tmp = intFakeFetch(bpc);
384 switch (psxTestLoadDelay(reg, tmp)) {
386 delayReadWrite(reg, bpc); return;
388 delayRead(reg, bpc); return;
390 delayWrite(reg, bpc); return;
393 psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code);
401 #define isBranch(c_) \
402 ((1 <= ((c_) >> 26) && ((c_) >> 26) <= 7) || ((c_) & 0xfc00003e) == 8)
403 #define swap_(a_, b_) { u32 t_ = a_; a_ = b_; b_ = t_; }
405 // tar1 is main branch target, 'code' is opcode in DS
406 static u32 psxBranchNoDelay(psxRegisters *regs_, u32 tar1, u32 code, int *taken) {
409 assert(isBranch(code));
411 switch (code >> 26) {
412 case 0x00: // SPECIAL
419 regs_->GPR.r[_Rd_] = tar1 + 4;
427 regs_->GPR.n.ra = tar1 + 4;
429 return tar1 + (s16)_Im_ * 4;
432 regs_->GPR.n.ra = tar1 + 4;
433 if (_i32(_rRs_) >= 0)
434 return tar1 + (s16)_Im_ * 4;
437 if (rt & 1) { // BGEZ
438 if (_i32(_rRs_) >= 0)
439 return tar1 + (s16)_Im_ * 4;
443 return tar1 + (s16)_Im_ * 4;
449 return (tar1 & 0xf0000000u) + _Target_ * 4;
451 regs_->GPR.n.ra = tar1 + 4;
452 return (tar1 & 0xf0000000u) + _Target_ * 4;
454 if (_i32(_rRs_) == _i32(_rRt_))
455 return tar1 + (s16)_Im_ * 4;
458 if (_i32(_rRs_) != _i32(_rRt_))
459 return tar1 + (s16)_Im_ * 4;
462 if (_i32(_rRs_) <= 0)
463 return tar1 + (s16)_Im_ * 4;
467 return tar1 + (s16)_Im_ * 4;
475 static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) {
479 tar2 = psxBranchNoDelay(regs, tar1, code1, &taken);
485 * taken branch in delay slot:
486 * - execute 1 instruction at tar1
487 * - jump to tar2 (target of branch in delay slot; this branch
488 * has no normal delay slot, instruction at tar1 was fetched instead)
490 for (lim = 0; lim < 8; lim++) {
491 regs->code = code = fetch(regs, psxMemRLUT, tar1);
493 if (likely(!isBranch(code))) {
494 psxBSC[code >> 26](regs, code);
498 tar1 = psxBranchNoDelay(regs, tar2, code, &taken);
504 SysPrintf("Evil chained DS branches @ %08x %08x %08x\n", regs->pc, tar1, tar2);
507 static void doBranch(psxRegisters *regs, u32 tar) {
510 branch2 = branch = 1;
512 // fetch the delay slot
515 regs->code = code = fetch(regs, psxMemRLUT, pc);
519 // check for branch in delay slot
520 if (unlikely(isBranch(code))) {
521 psxDoDelayBranch(regs, tar, code);
522 log_unhandled("branch in DS: %08x->%08x\n", pc, regs->pc);
528 // check for load delay
535 psxDelayTest(_Rt_, tar);
545 psxDelayTest(_Rt_, tar);
552 psxDelayTest(_Rt_, tar);
555 if (tmp >= 0x20 && tmp <= 0x26) { // LB/LH/LWL/LW/LBU/LHU/LWR
556 psxDelayTest(_Rt_, tar);
562 psxBSC[code >> 26](regs, code);
570 static void doBranchReg(psxRegisters *regs, u32 tar) {
571 #ifdef DO_EXCEPTION_ADDR_ERR
572 if (unlikely(tar & 3)) {
573 psxRegs.pc = psxRegs.CP0.n.BadVAddr = tar;
574 psxException(R3000E_AdEL << 2, branch, &psxRegs.CP0);
583 #if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5)
584 #define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r))
585 #define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r))
587 #define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);})
588 #define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^ b) & (a ^ r) & (1u<<31);})
591 static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
593 if (add_overflow(a1, a2, r)) {
594 //printf("ov %08x + %08x = %08x\n", a1, a2, r);
596 psxException(R3000E_Ov << 2, branch, ®s->CP0);
603 static void subExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
605 if (sub_overflow(a1, a2, r)) {
607 psxException(R3000E_Ov << 2, branch, ®s->CP0);
614 /*********************************************************
615 * Arithmetic with immediate operand *
616 * Format: OP rt, rs, immediate *
617 *********************************************************/
618 OP(psxADDI) { addExc(regs_, _Rt_, _i32(_rRs_), _Imm_); } // Rt = Rs + Im (Exception on Integer Overflow)
619 OP(psxADDIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; } // Rt = Rs + Im
620 OP(psxANDI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; } // Rt = Rs And Im
621 OP(psxORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; } // Rt = Rs Or Im
622 OP(psxXORI) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; } // Rt = Rs Xor Im
623 OP(psxSLTI) { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; } // Rt = Rs < Im (Signed)
624 OP(psxSLTIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = Rs < Im (Unsigned)
626 /*********************************************************
627 * Register arithmetic *
628 * Format: OP rd, rs, rt *
629 *********************************************************/
630 OP(psxADD) { addExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs + Rt (Exception on Integer Overflow)
631 OP(psxSUB) { subExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs - Rt (Exception on Integer Overflow)
632 OP(psxADDU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); } // Rd = Rs + Rt
633 OP(psxSUBU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); } // Rd = Rs - Rt
634 OP(psxAND) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); } // Rd = Rs And Rt
635 OP(psxOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); } // Rd = Rs Or Rt
636 OP(psxXOR) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); } // Rd = Rs Xor Rt
637 OP(psxNOR) { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt
638 OP(psxSLT) { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); } // Rd = Rs < Rt (Signed)
639 OP(psxSLTU) { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); } // Rd = Rs < Rt (Unsigned)
641 /*********************************************************
642 * Register mult/div & Register trap logic *
643 * Format: OP rs, rt *
644 *********************************************************/
648 if (_rRs_ & 0x80000000) {
654 #if !defined(__arm__) && !defined(__aarch64__)
655 else if (_rRs_ == 0x80000000 && _rRt_ == 0xFFFFFFFF) {
661 _rLo_ = _i32(_rRs_) / _i32(_rRt_);
662 _rHi_ = _i32(_rRs_) % _i32(_rRt_);
667 regs_->muldivBusyCycle = regs_->cycle + 37;
673 _rLo_ = _rRs_ / _rRt_;
674 _rHi_ = _rRs_ % _rRt_;
683 regs_->muldivBusyCycle = regs_->cycle + 37;
684 psxDIVU(regs_, code);
688 u64 res = (s64)_i32(_rRs_) * _i32(_rRt_);
690 regs_->GPR.n.lo = (u32)res;
691 regs_->GPR.n.hi = (u32)(res >> 32);
695 // approximate, but maybe good enough
697 u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1));
698 u32 c = 7 + (2 - (lz / 11)) * 4;
699 regs_->muldivBusyCycle = regs_->cycle + c;
700 psxMULT(regs_, code);
704 u64 res = (u64)_u32(_rRs_) * _u32(_rRt_);
706 regs_->GPR.n.lo = (u32)(res & 0xffffffff);
707 regs_->GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
711 // approximate, but maybe good enough
712 u32 lz = __builtin_clz(_rRs_ | 1);
713 u32 c = 7 + (2 - (lz / 11)) * 4;
714 regs_->muldivBusyCycle = regs_->cycle + c;
715 psxMULTU(regs_, code);
718 /*********************************************************
719 * Register branch logic *
720 * Format: OP rs, offset *
721 *********************************************************/
722 #define RepZBranchi32(op) \
723 if(_i32(_rRs_) op 0) \
724 doBranch(regs_, _BranchTarget_);
725 #define RepZBranchLinki32(op) { \
726 s32 temp = _i32(_rRs_); \
729 doBranch(regs_, _BranchTarget_); \
732 OP(psxBGEZ) { RepZBranchi32(>=) } // Branch if Rs >= 0
733 OP(psxBGEZAL) { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link
734 OP(psxBGTZ) { RepZBranchi32(>) } // Branch if Rs > 0
735 OP(psxBLEZ) { RepZBranchi32(<=) } // Branch if Rs <= 0
736 OP(psxBLTZ) { RepZBranchi32(<) } // Branch if Rs < 0
737 OP(psxBLTZAL) { RepZBranchLinki32(<) } // Branch if Rs < 0 and link
739 /*********************************************************
740 * Shift arithmetic with constant shift *
741 * Format: OP rd, rt, sa *
742 *********************************************************/
743 OP(psxSLL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa
744 OP(psxSRA) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic)
745 OP(psxSRL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical)
747 /*********************************************************
748 * Shift arithmetic with variant register shift *
749 * Format: OP rd, rt, rs *
750 *********************************************************/
751 OP(psxSLLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs
752 OP(psxSRAV) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic)
753 OP(psxSRLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical)
755 /*********************************************************
756 * Load higher 16 bits of the first word in GPR with imm *
757 * Format: OP rt, immediate *
758 *********************************************************/
759 OP(psxLUI) { if (!_Rt_) return; _rRt_ = code << 16; } // Upper halfword of Rt = Im
761 /*********************************************************
762 * Move from HI/LO to GPR *
764 *********************************************************/
765 OP(psxMFHI) { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi
766 OP(psxMFLO) { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo
768 static void mflohiCheckStall(psxRegisters *regs_)
770 u32 left = regs_->muldivBusyCycle - regs_->cycle;
772 //printf("muldiv stall %u\n", left);
773 regs_->cycle = regs_->muldivBusyCycle;
777 OP(psxMFHI_stall) { mflohiCheckStall(regs_); psxMFHI(regs_, code); }
778 OP(psxMFLO_stall) { mflohiCheckStall(regs_); psxMFLO(regs_, code); }
780 /*********************************************************
781 * Move to GPR to HI/LO & Register jump *
783 *********************************************************/
784 OP(psxMTHI) { _rHi_ = _rRs_; } // Hi = Rs
785 OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs
787 /*********************************************************
788 * Special purpose instructions *
790 *********************************************************/
793 psxException(R3000E_Bp << 2, branch, ®s_->CP0);
798 psxException(R3000E_Syscall << 2, branch, ®s_->CP0);
801 static inline void execI_(u8 **memRLUT, psxRegisters *regs_);
803 static inline void psxTestSWInts(psxRegisters *regs_, int step) {
804 if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 &&
805 regs_->CP0.n.Status & 0x1) {
807 execI_(psxMemRLUT, regs_);
808 regs_->CP0.n.Cause &= ~0x7c;
809 psxException(regs_->CP0.n.Cause, branch, ®s_->CP0);
814 // SysPrintf("psxRFE\n");
815 regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) |
816 ((regs_->CP0.n.Status & 0x3c) >> 2);
817 psxTestSWInts(regs_, 0);
820 /*********************************************************
821 * Register branch logic *
822 * Format: OP rs, rt, offset *
823 *********************************************************/
824 #define RepBranchi32(op) { \
825 if (_i32(_rRs_) op _i32(_rRt_)) \
826 doBranch(regs_, _BranchTarget_); \
829 OP(psxBEQ) { RepBranchi32(==) } // Branch if Rs == Rt
830 OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt
832 /*********************************************************
834 * Format: OP target *
835 *********************************************************/
836 OP(psxJ) { doBranch(regs_, _JumpTarget_); }
837 OP(psxJAL) { _SetLink(31); doBranch(regs_, _JumpTarget_); }
839 /*********************************************************
841 * Format: OP rs, rd *
842 *********************************************************/
844 doBranchReg(regs_, _rRs_);
849 u32 temp = _u32(_rRs_);
850 if (_Rd_) { _SetLink(_Rd_); }
851 doBranchReg(regs_, temp);
854 /*********************************************************
855 * Load and store for GPR *
856 * Format: OP rt, offset(base) *
857 *********************************************************/
859 #define _oB_ (regs_->GPR.r[_Rs_] + _Imm_)
861 OP(psxLB) { u32 v = (s8)psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; }
862 OP(psxLBU) { u32 v = psxMemRead8(_oB_); if (_Rt_) _rRt_ = v; }
863 OP(psxLH) { u32 v = (s16)psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; }
864 OP(psxLHU) { u32 v = psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; }
865 OP(psxLW) { u32 v = psxMemRead32(_oB_); if (_Rt_) _rRt_ = v; }
868 static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 };
869 static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 };
871 u32 shift = addr & 3;
872 u32 mem = psxMemRead32(addr & ~3);
875 _rRt_ = (_u32(_rRt_) & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]);
878 Mem = 1234. Reg = abcd
880 0 4bcd (mem << 24) | (reg & 0x00ffffff)
881 1 34cd (mem << 16) | (reg & 0x0000ffff)
882 2 234d (mem << 8) | (reg & 0x000000ff)
883 3 1234 (mem ) | (reg & 0x00000000)
888 static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 };
889 static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 };
891 u32 shift = addr & 3;
892 u32 mem = psxMemRead32(addr & ~3);
895 _rRt_ = (_u32(_rRt_) & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]);
898 Mem = 1234. Reg = abcd
900 0 1234 (mem ) | (reg & 0x00000000)
901 1 a123 (mem >> 8) | (reg & 0xff000000)
902 2 ab12 (mem >> 16) | (reg & 0xffff0000)
903 3 abc1 (mem >> 24) | (reg & 0xffffff00)
907 OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); }
908 OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); }
909 OP(psxSW) { psxMemWrite32(_oB_, _rRt_); }
912 static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 };
913 static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 };
915 u32 shift = addr & 3;
916 u32 mem = psxMemRead32(addr & ~3);
918 psxMemWrite32(addr & ~3, (_u32(_rRt_) >> SWL_SHIFT[shift]) |
919 ( mem & SWL_MASK[shift]) );
921 Mem = 1234. Reg = abcd
923 0 123a (reg >> 24) | (mem & 0xffffff00)
924 1 12ab (reg >> 16) | (mem & 0xffff0000)
925 2 1abc (reg >> 8) | (mem & 0xff000000)
926 3 abcd (reg ) | (mem & 0x00000000)
931 static const u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff };
932 static const u32 SWR_SHIFT[4] = { 0, 8, 16, 24 };
934 u32 shift = addr & 3;
935 u32 mem = psxMemRead32(addr & ~3);
937 psxMemWrite32(addr & ~3, (_u32(_rRt_) << SWR_SHIFT[shift]) |
938 ( mem & SWR_MASK[shift]) );
941 Mem = 1234. Reg = abcd
943 0 abcd (reg ) | (mem & 0x00000000)
944 1 bcd4 (reg << 8) | (mem & 0x000000ff)
945 2 cd34 (reg << 16) | (mem & 0x0000ffff)
946 3 d234 (reg << 24) | (mem & 0x00ffffff)
950 /*********************************************************
951 * Moves between GPR and COPx *
952 * Format: OP rt, fs *
953 *********************************************************/
956 #ifdef DO_EXCEPTION_RESERVEDI
957 if (unlikely(r == 0)) {
959 psxException(R3000E_RI << 2, branch, ®s_->CP0);
963 _rRt_ = regs_->CP0.r[r];
966 OP(psxCFC0) { if (!_Rt_) return; _rRt_ = _rFs_; }
968 static void setupCop(u32 sr);
970 void MTC0(psxRegisters *regs_, int reg, u32 val) {
971 // SysPrintf("MTC0 %d: %x\n", reg, val);
974 if (unlikely((regs_->CP0.n.Status ^ val) & (1 << 16)))
975 psxMemOnIsolate((val >> 16) & 1);
976 if (unlikely((regs_->CP0.n.Status ^ val) & (7 << 29)))
978 regs_->CP0.n.Status = val;
979 psxTestSWInts(regs_, 1);
983 regs_->CP0.n.Cause &= ~0x0300;
984 regs_->CP0.n.Cause |= val & 0x0300;
985 psxTestSWInts(regs_, 0);
989 regs_->CP0.r[reg] = val;
994 OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
995 OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
997 /*********************************************************
998 * Unknown instruction (would generate an exception) *
1000 *********************************************************/
1001 static inline void psxNULL_(void) {
1002 //printf("op %08x @%08x\n", psxRegs.code, psxRegs.pc);
1007 #ifdef DO_EXCEPTION_RESERVEDI
1009 psxException(R3000E_RI << 2, branch, ®s_->CP0);
1013 void gteNULL(struct psxCP2Regs *regs) {
1018 psxSPC[_Funct_](regs_, code);
1023 case 0x00: psxMFC0(regs_, code); break;
1024 case 0x02: psxCFC0(regs_, code); break;
1025 case 0x04: psxMTC0(regs_, code); break;
1026 case 0x06: psxCTC0(regs_, code); break;
1027 case 0x10: psxRFE(regs_, code); break;
1028 default: psxNULL_(); break;
1033 // MTC0(regs_, _Rt_, psxMemRead32(_oB_)); // ?
1034 log_unhandled("LWC0 %08x\n", code);
1038 // ??? what actually happens here?
1042 #ifdef DO_EXCEPTION_RESERVEDI
1044 psxException((1<<28) | (R3000E_RI << 2), branch, ®s_->CP0);
1049 psxCP2[_Funct_](®s_->CP2);
1055 psxCP2[f](®s_->CP2);
1059 #ifdef DO_EXCEPTION_RESERVEDI
1061 psxException((2<<28) | (R3000E_RI << 2), branch, ®s_->CP0);
1067 regs_->GPR.r[_Rt_] = MFC2(®s_->CP2, _Rd_);
1072 regs_->GPR.r[_Rt_] = regs_->CP2C.r[_Rd_];
1076 MTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_);
1080 CTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_);
1084 MTC2(®s_->CP2, psxMemRead32(_oB_), _Rt_);
1088 psxMemWrite32(_oB_, MFC2(®s_->CP2, _Rt_));
1093 gteLWC2(regs_, code);
1098 gteSWC2(regs_, code);
1102 // ??? what actually happens here?
1106 #ifdef DO_EXCEPTION_RESERVEDI
1108 psxException((3<<28) | (R3000E_RI << 2), branch, ®s_->CP0);
1113 // does this read memory?
1114 log_unhandled("LWCx %08x\n", code);
1118 // does this write something to memory?
1119 log_unhandled("SWCx %08x\n", code);
1122 static void psxBASIC(struct psxCP2Regs *cp2regs) {
1123 psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2));
1124 u32 code = regs_->code;
1125 assert(regs_ == &psxRegs);
1127 case 0x00: gteMFC2(regs_, code); break;
1128 case 0x02: gteCFC2(regs_, code); break;
1129 case 0x04: gteMTC2(regs_, code); break;
1130 case 0x06: gteCTC2(regs_, code); break;
1131 default: psxNULL_(); break;
1138 case 0x10: psxBLTZAL(regs_, code); break;
1139 case 0x11: psxBGEZAL(regs_, code); break;
1142 psxBGEZ(regs_, code);
1144 psxBLTZ(regs_, code);
1150 if (unlikely(!Config.HLE)) {
1151 psxSWCx(regs_, code);
1154 hleCode = code & 0x03ffffff;
1155 if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) {
1156 psxSWCx(regs_, code);
1162 static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = {
1163 psxSPECIAL, psxREGIMM, psxJ , psxJAL , psxBEQ , psxBNE , psxBLEZ, psxBGTZ,
1164 psxADDI , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI ,
1165 psxCOP0 , psxCOP1d , psxCOP2, psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1166 psxNULL , psxCOP1d , psxCOP2d,psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1167 psxLB , psxLH , psxLWL , psxLW , psxLBU , psxLHU , psxLWR , psxCOP3d,
1168 psxSB , psxSH , psxSWL , psxSW , psxNULL, psxCOP1d,psxSWR , psxCOP3d,
1169 psxLWC0 , psxLWCx , gteLWC2, psxLWCx , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1170 psxSWCx , psxSWCx , gteSWC2, psxHLE , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1173 static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = {
1174 psxSLL , psxNULL , psxSRL , psxSRA , psxSLLV , psxNULL , psxSRLV, psxSRAV,
1175 psxJR , psxJALR , psxNULL, psxNULL, psxSYSCALL, psxBREAK, psxNULL, psxNULL,
1176 psxMFHI, psxMTHI , psxMFLO, psxMTLO, psxNULL , psxNULL , psxNULL, psxNULL,
1177 psxMULT, psxMULTU, psxDIV , psxDIVU, psxNULL , psxNULL , psxNULL, psxNULL,
1178 psxADD , psxADDU , psxSUB , psxSUBU, psxAND , psxOR , psxXOR , psxNOR ,
1179 psxNULL, psxNULL , psxSLT , psxSLTU, psxNULL , psxNULL , psxNULL, psxNULL,
1180 psxNULL, psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, psxNULL,
1181 psxNULL, psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, psxNULL
1184 void (*psxCP2[64])(struct psxCP2Regs *regs) = {
1185 psxBASIC, gteRTPS , gteNULL , gteNULL, gteNULL, gteNULL , gteNCLIP, gteNULL, // 00
1186 gteNULL , gteNULL , gteNULL , gteNULL, gteOP , gteNULL , gteNULL , gteNULL, // 08
1187 gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , gteNULL , gteNCDT , gteNULL, // 10
1188 gteNULL , gteNULL , gteNULL , gteNCCS, gteCC , gteNULL , gteNCS , gteNULL, // 18
1189 gteNCT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 20
1190 gteSQR , gteDCPL , gteDPCT , gteNULL, gteNULL, gteAVSZ3, gteAVSZ4, gteNULL, // 28
1191 gteRTPT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 30
1192 gteNULL , gteNULL , gteNULL , gteNULL, gteNULL, gteGPF , gteGPL , gteNCCT // 38
1195 ///////////////////////////////////////////
1197 static int intInit() {
1201 static void intReset() {
1204 static inline void execI_(u8 **memRLUT, psxRegisters *regs_) {
1207 regs_->code = fetch(regs_, memRLUT, pc);
1211 psxBSC[regs_->code >> 26](regs_, regs_->code);
1214 static void intExecute() {
1215 psxRegisters *regs_ = &psxRegs;
1216 u8 **memRLUT = psxMemRLUT;
1220 execI_(memRLUT, regs_);
1223 void intExecuteBlock(enum blockExecCaller caller) {
1224 psxRegisters *regs_ = &psxRegs;
1225 u8 **memRLUT = psxMemRLUT;
1229 execI_(memRLUT, regs_);
1232 static void intClear(u32 Addr, u32 Size) {
1235 static void intNotify(enum R3000Anote note, void *data) {
1237 case R3000ACPU_NOTIFY_AFTER_LOAD:
1238 setupCop(psxRegs.CP0.n.Status);
1240 case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core?
1241 memset(&ICache, 0xff, sizeof(ICache));
1243 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
1244 case R3000ACPU_NOTIFY_BEFORE_SAVE:
1249 static void setupCop(u32 sr)
1251 if (sr & (1u << 29))
1252 psxBSC[17] = psxCOP1;
1254 psxBSC[17] = psxCOP1d;
1255 if (sr & (1u << 30))
1256 psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall;
1258 psxBSC[18] = psxCOP2d;
1259 if (sr & (1u << 31))
1260 psxBSC[19] = psxCOP3;
1262 psxBSC[19] = psxCOP3d;
1265 void intApplyConfig() {
1268 assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall);
1269 assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall);
1270 assert(psxSPC[16] == psxMFHI || psxSPC[16] == psxMFHI_stall);
1271 assert(psxSPC[18] == psxMFLO || psxSPC[18] == psxMFLO_stall);
1272 assert(psxSPC[24] == psxMULT || psxSPC[24] == psxMULT_stall);
1273 assert(psxSPC[25] == psxMULTU || psxSPC[25] == psxMULTU_stall);
1274 assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall);
1275 assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall);
1277 if (Config.DisableStalls) {
1278 psxBSC[18] = psxCOP2;
1279 psxBSC[50] = gteLWC2;
1280 psxBSC[58] = gteSWC2;
1281 psxSPC[16] = psxMFHI;
1282 psxSPC[18] = psxMFLO;
1283 psxSPC[24] = psxMULT;
1284 psxSPC[25] = psxMULTU;
1285 psxSPC[26] = psxDIV;
1286 psxSPC[27] = psxDIVU;
1288 psxBSC[18] = psxCOP2_stall;
1289 psxBSC[50] = gteLWC2_stall;
1290 psxBSC[58] = gteSWC2_stall;
1291 psxSPC[16] = psxMFHI_stall;
1292 psxSPC[18] = psxMFLO_stall;
1293 psxSPC[24] = psxMULT_stall;
1294 psxSPC[25] = psxMULTU_stall;
1295 psxSPC[26] = psxDIV_stall;
1296 psxSPC[27] = psxDIVU_stall;
1298 setupCop(psxRegs.CP0.n.Status);
1300 // dynarec may occasionally call the interpreter, in such a case the
1301 // cache won't work (cache only works right if all fetches go through it)
1302 if (!Config.icache_emulation || psxCpu != &psxInt)
1303 fetch = fetchNoCache;
1305 fetch = fetchICache;
1307 cycle_mult = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT
1308 ? Config.cycle_multiplier_override : Config.cycle_multiplier;
1309 psxRegs.subCycleStep = 0x10000 * cycle_mult / 100;
1312 static void intShutdown() {
1315 // single step (may do several ops in case of a branch)
1317 execI_(psxMemRLUT, &psxRegs);
1320 R3000Acpu psxInt = {