1 /***************************************************************************
2 * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
3 * Copyright (C) 2023 notaz *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
19 ***************************************************************************/
22 * PSX assembly interpreter.
25 #include "psxcommon.h"
29 #include "psxinterpreter.h"
32 #include "../include/compiler_features.h"
34 // these may cause issues: because of poor timing we may step
35 // on instructions that real hardware would never reach
36 #define DO_EXCEPTION_RESERVEDI
37 #define DO_EXCEPTION_ALIGNMENT_BRANCH
38 //#define DO_EXCEPTION_ALIGNMENT_DATA
39 #define HANDLE_LOAD_DELAY
41 static int branch = 0;
42 static int branch2 = 0;
45 #define INT_ATTR __attribute__((regparm(2)))
50 #define INVALID_PTR NULL
54 static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code);
55 static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code);
58 static void doLoad(psxRegisters *regs, u32 r, u32 val)
60 #ifdef HANDLE_LOAD_DELAY
61 int sel = regs->dloadSel ^ 1;
62 assert(regs->dloadReg[sel] == 0);
63 regs->dloadReg[sel] = r;
64 regs->dloadVal[sel] = r ? val : 0;
65 if (regs->dloadReg[sel ^ 1] == r)
66 regs->dloadVal[sel ^ 1] = regs->dloadReg[sel ^ 1] = 0;
68 regs->GPR.r[r] = r ? val : 0;
72 static void dloadRt(psxRegisters *regs, u32 r, u32 val)
74 #ifdef HANDLE_LOAD_DELAY
75 int sel = regs->dloadSel;
76 if (unlikely(regs->dloadReg[sel] == r))
77 regs->dloadVal[sel] = regs->dloadReg[sel] = 0;
79 regs->GPR.r[r] = r ? val : 0;
82 static void dloadStep(psxRegisters *regs)
84 #ifdef HANDLE_LOAD_DELAY
85 int sel = regs->dloadSel;
86 regs->GPR.r[regs->dloadReg[sel]] = regs->dloadVal[sel];
87 regs->dloadVal[sel] = regs->dloadReg[sel] = 0;
89 assert(regs->GPR.r[0] == 0);
93 static void dloadFlush(psxRegisters *regs)
95 #ifdef HANDLE_LOAD_DELAY
96 regs->GPR.r[regs->dloadReg[0]] = regs->dloadVal[0];
97 regs->GPR.r[regs->dloadReg[1]] = regs->dloadVal[1];
98 regs->dloadVal[0] = regs->dloadVal[1] = 0;
99 regs->dloadReg[0] = regs->dloadReg[1] = 0;
100 assert(regs->GPR.r[0] == 0);
104 static void dloadClear(psxRegisters *regs)
106 #ifdef HANDLE_LOAD_DELAY
107 regs->dloadVal[0] = regs->dloadVal[1] = 0;
108 regs->dloadReg[0] = regs->dloadReg[1] = 0;
113 static void intException(psxRegisters *regs, u32 pc, u32 cause)
117 psxException(cause, branch, ®s->CP0);
120 // get an opcode without triggering exceptions or affecting cache
121 u32 intFakeFetch(u32 pc)
123 u8 *base = psxMemRLUT[pc >> 16];
125 if (unlikely(base == INVALID_PTR))
127 code = (u32 *)(base + (pc & 0xfffc));
128 return SWAP32(*code);
132 static u32 INT_ATTR fetchNoCache(psxRegisters *regs, u8 **memRLUT, u32 pc)
134 u8 *base = memRLUT[pc >> 16];
136 if (unlikely(base == INVALID_PTR)) {
137 SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra);
138 intException(regs, pc, R3000E_IBE << 2);
139 return 0; // execute as nop
141 code = (u32 *)(base + (pc & 0xfffc));
142 return SWAP32(*code);
147 Use old CPU cache code when the RAM location is updated with new code (affects in-game racing)
149 static struct cache_entry {
154 static u32 INT_ATTR fetchICache(psxRegisters *regs, u8 **memRLUT, u32 pc)
159 // this is not how the hardware works but whatever
160 struct cache_entry *entry = &ICache[(pc & 0xff0) >> 4];
162 if (((entry->tag ^ pc) & 0xfffffff0) != 0 || pc < entry->tag)
164 const u8 *base = memRLUT[pc >> 16];
166 if (unlikely(base == INVALID_PTR)) {
167 SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra);
168 intException(regs, pc, R3000E_IBE << 2);
169 return 0; // execute as nop
171 code = (u32 *)(base + (pc & 0xfff0));
174 // treat as 4 words, although other configurations are said to be possible
177 case 0x00: entry->data[0] = SWAP32(code[0]);
178 case 0x04: entry->data[1] = SWAP32(code[1]);
179 case 0x08: entry->data[2] = SWAP32(code[2]);
180 case 0x0c: entry->data[3] = SWAP32(code[3]);
183 return entry->data[(pc & 0x0f) >> 2];
186 return fetchNoCache(regs, memRLUT, pc);
189 static u32 (INT_ATTR *fetch)(psxRegisters *regs_, u8 **memRLUT, u32 pc) = fetchNoCache;
191 // Make the timing events trigger faster as we are currently assuming everything
192 // takes one cycle, which is not the case on real hardware.
193 // FIXME: count cache misses, memory latencies, stalls to get rid of this
194 static inline void addCycle(void)
196 assert(psxRegs.subCycleStep >= 0x10000);
197 psxRegs.subCycle += psxRegs.subCycleStep;
198 psxRegs.cycle += psxRegs.subCycle >> 16;
199 psxRegs.subCycle &= 0xffff;
202 /**** R3000A Instruction Macros ****/
203 #define _PC_ regs_->pc // The next PC to be executed
205 #define _fOp_(code) ((code >> 26) ) // The opcode part of the instruction register
206 #define _fFunct_(code) ((code ) & 0x3F) // The funct part of the instruction register
207 #define _fRd_(code) ((code >> 11) & 0x1F) // The rd part of the instruction register
208 #define _fRt_(code) ((code >> 16) & 0x1F) // The rt part of the instruction register
209 #define _fRs_(code) ((code >> 21) & 0x1F) // The rs part of the instruction register
210 #define _fSa_(code) ((code >> 6) & 0x1F) // The sa part of the instruction register
211 #define _fIm_(code) ((u16)code) // The immediate part of the instruction register
212 #define _fTarget_(code) (code & 0x03ffffff) // The target part of the instruction register
214 #define _fImm_(code) ((s16)code) // sign-extended immediate
215 #define _fImmU_(code) (code&0xffff) // zero-extended immediate
217 #define _Op_ _fOp_(code)
218 #define _Funct_ _fFunct_(code)
219 #define _Rd_ _fRd_(code)
220 #define _Rt_ _fRt_(code)
221 #define _Rs_ _fRs_(code)
222 #define _Sa_ _fSa_(code)
223 #define _Im_ _fIm_(code)
224 #define _Target_ _fTarget_(code)
226 #define _Imm_ _fImm_(code)
227 #define _ImmU_ _fImmU_(code)
229 #define _rRs_ regs_->GPR.r[_Rs_] // Rs register
230 #define _rRt_ regs_->GPR.r[_Rt_] // Rt register
231 #define _rSa_ regs_->GPR.r[_Sa_] // Sa register
233 #define _rHi_ regs_->GPR.n.hi // The HI register
234 #define _rLo_ regs_->GPR.n.lo // The LO register
236 #define _JumpTarget_ ((_Target_ * 4) + (_PC_ & 0xf0000000)) // Calculates the target during a jump instruction
237 #define _BranchTarget_ ((s16)_Im_ * 4 + _PC_) // Calculates the target during a branch instruction
239 #define _SetLink(x) dloadRt(regs_, x, _PC_ + 4); // Sets the return address in the link register
242 static inline INT_ATTR void name(psxRegisters *regs_, u32 code)
244 // this defines shall be used with the tmp
245 // of the next func (instead of _Funct_...)
246 #define _tFunct_ ((tmp ) & 0x3F) // The funct part of the instruction register
247 #define _tRd_ ((tmp >> 11) & 0x1F) // The rd part of the instruction register
248 #define _tRt_ ((tmp >> 16) & 0x1F) // The rt part of the instruction register
249 #define _tRs_ ((tmp >> 21) & 0x1F) // The rs part of the instruction register
250 #define _tSa_ ((tmp >> 6) & 0x1F) // The sa part of the instruction register
252 #define _i32(x) (s32)(x)
253 #define _u32(x) (u32)(x)
255 #define isBranch(c_) \
256 ((1 <= ((c_) >> 26) && ((c_) >> 26) <= 7) || ((c_) & 0xfc00003e) == 8)
257 #define swap_(a_, b_) { u32 t_ = a_; a_ = b_; b_ = t_; }
259 // tar1 is main branch target, 'code' is opcode in DS
260 static u32 psxBranchNoDelay(psxRegisters *regs_, u32 tar1, u32 code, int *taken) {
263 assert(isBranch(code));
265 switch (code >> 26) {
266 case 0x00: // SPECIAL
273 regs_->GPR.r[_Rd_] = tar1 + 4;
281 regs_->GPR.n.ra = tar1 + 4;
283 return tar1 + (s16)_Im_ * 4;
286 regs_->GPR.n.ra = tar1 + 4;
287 if (_i32(_rRs_) >= 0)
288 return tar1 + (s16)_Im_ * 4;
291 if (rt & 1) { // BGEZ
292 if (_i32(_rRs_) >= 0)
293 return tar1 + (s16)_Im_ * 4;
297 return tar1 + (s16)_Im_ * 4;
303 return (tar1 & 0xf0000000u) + _Target_ * 4;
305 regs_->GPR.n.ra = tar1 + 4;
306 return (tar1 & 0xf0000000u) + _Target_ * 4;
308 if (_i32(_rRs_) == _i32(_rRt_))
309 return tar1 + (s16)_Im_ * 4;
312 if (_i32(_rRs_) != _i32(_rRt_))
313 return tar1 + (s16)_Im_ * 4;
316 if (_i32(_rRs_) <= 0)
317 return tar1 + (s16)_Im_ * 4;
321 return tar1 + (s16)_Im_ * 4;
329 static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) {
333 tar2 = psxBranchNoDelay(regs, tar1, code1, &taken);
339 * taken branch in delay slot:
340 * - execute 1 instruction at tar1
341 * - jump to tar2 (target of branch in delay slot; this branch
342 * has no normal delay slot, instruction at tar1 was fetched instead)
344 for (lim = 0; lim < 8; lim++) {
345 regs->code = code = fetch(regs, psxMemRLUT, tar1);
347 if (likely(!isBranch(code))) {
349 psxBSC[code >> 26](regs, code);
353 tar1 = psxBranchNoDelay(regs, tar2, code, &taken);
359 SysPrintf("Evil chained DS branches @ %08x %08x %08x\n", regs->pc, tar1, tar2);
362 static void doBranch(psxRegisters *regs, u32 tar) {
365 branch2 = branch = 1;
367 // fetch the delay slot
370 regs->code = code = fetch(regs, psxMemRLUT, pc);
374 // check for branch in delay slot
375 if (unlikely(isBranch(code))) {
376 psxDoDelayBranch(regs, tar, code);
377 log_unhandled("branch in DS: %08x->%08x\n", pc, regs->pc);
384 psxBSC[code >> 26](regs, code);
392 static void doBranchReg(psxRegisters *regs, u32 tar) {
393 #ifdef DO_EXCEPTION_ALIGNMENT_BRANCH
394 if (unlikely(tar & 3)) {
395 SysPrintf("game crash @%08x, ra=%08x\n", tar, regs->GPR.n.ra);
396 psxRegs.CP0.n.BadVAddr = tar;
397 intException(regs, tar, R3000E_AdEL << 2);
406 #if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5)
407 #define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r))
408 #define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r))
410 #define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);})
411 #define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^ b) & (a ^ r) & (1u<<31);})
414 static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
416 if (add_overflow(a1, a2, val)) {
417 //printf("ov %08x + %08x = %08x\n", a1, a2, val);
418 intException(regs, regs->pc - 4, R3000E_Ov << 2);
421 dloadRt(regs, rt, val);
424 static void subExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
426 if (sub_overflow(a1, a2, val)) {
427 intException(regs, regs->pc - 4, R3000E_Ov << 2);
430 dloadRt(regs, rt, val);
433 /*********************************************************
434 * Arithmetic with immediate operand *
435 * Format: OP rt, rs, immediate *
436 *********************************************************/
437 OP(psxADDI) { addExc (regs_, _Rt_, _i32(_rRs_), _Imm_); } // Rt = Rs + Im (Exception on Integer Overflow)
438 OP(psxADDIU) { dloadRt(regs_, _Rt_, _u32(_rRs_) + _Imm_ ); } // Rt = Rs + Im
439 OP(psxANDI) { dloadRt(regs_, _Rt_, _u32(_rRs_) & _ImmU_); } // Rt = Rs And Im
440 OP(psxORI) { dloadRt(regs_, _Rt_, _u32(_rRs_) | _ImmU_); } // Rt = Rs Or Im
441 OP(psxXORI) { dloadRt(regs_, _Rt_, _u32(_rRs_) ^ _ImmU_); } // Rt = Rs Xor Im
442 OP(psxSLTI) { dloadRt(regs_, _Rt_, _i32(_rRs_) < _Imm_ ); } // Rt = Rs < Im (Signed)
443 OP(psxSLTIU) { dloadRt(regs_, _Rt_, _u32(_rRs_) < ((u32)_Imm_)); } // Rt = Rs < Im (Unsigned)
445 /*********************************************************
446 * Register arithmetic *
447 * Format: OP rd, rs, rt *
448 *********************************************************/
449 OP(psxADD) { addExc (regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs + Rt (Exception on Integer Overflow)
450 OP(psxSUB) { subExc (regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs - Rt (Exception on Integer Overflow)
451 OP(psxADDU) { dloadRt(regs_, _Rd_, _u32(_rRs_) + _u32(_rRt_)); } // Rd = Rs + Rt
452 OP(psxSUBU) { dloadRt(regs_, _Rd_, _u32(_rRs_) - _u32(_rRt_)); } // Rd = Rs - Rt
453 OP(psxAND) { dloadRt(regs_, _Rd_, _u32(_rRs_) & _u32(_rRt_)); } // Rd = Rs And Rt
454 OP(psxOR) { dloadRt(regs_, _Rd_, _u32(_rRs_) | _u32(_rRt_)); } // Rd = Rs Or Rt
455 OP(psxXOR) { dloadRt(regs_, _Rd_, _u32(_rRs_) ^ _u32(_rRt_)); } // Rd = Rs Xor Rt
456 OP(psxNOR) { dloadRt(regs_, _Rd_, ~_u32(_rRs_ | _u32(_rRt_))); } // Rd = Rs Nor Rt
457 OP(psxSLT) { dloadRt(regs_, _Rd_, _i32(_rRs_) < _i32(_rRt_)); } // Rd = Rs < Rt (Signed)
458 OP(psxSLTU) { dloadRt(regs_, _Rd_, _u32(_rRs_) < _u32(_rRt_)); } // Rd = Rs < Rt (Unsigned)
460 /*********************************************************
461 * Register mult/div & Register trap logic *
462 * Format: OP rs, rt *
463 *********************************************************/
467 if (_rRs_ & 0x80000000) {
473 #if !defined(__arm__) && !defined(__aarch64__)
474 else if (_rRs_ == 0x80000000 && _rRt_ == 0xFFFFFFFF) {
480 _rLo_ = _i32(_rRs_) / _i32(_rRt_);
481 _rHi_ = _i32(_rRs_) % _i32(_rRt_);
486 regs_->muldivBusyCycle = regs_->cycle + 37;
492 _rLo_ = _rRs_ / _rRt_;
493 _rHi_ = _rRs_ % _rRt_;
502 regs_->muldivBusyCycle = regs_->cycle + 37;
503 psxDIVU(regs_, code);
507 u64 res = (s64)_i32(_rRs_) * _i32(_rRt_);
509 regs_->GPR.n.lo = (u32)res;
510 regs_->GPR.n.hi = (u32)(res >> 32);
514 // approximate, but maybe good enough
516 u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1));
517 u32 c = 7 + (2 - (lz / 11)) * 4;
518 regs_->muldivBusyCycle = regs_->cycle + c;
519 psxMULT(regs_, code);
523 u64 res = (u64)_u32(_rRs_) * _u32(_rRt_);
525 regs_->GPR.n.lo = (u32)(res & 0xffffffff);
526 regs_->GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
530 // approximate, but maybe good enough
531 u32 lz = __builtin_clz(_rRs_ | 1);
532 u32 c = 7 + (2 - (lz / 11)) * 4;
533 regs_->muldivBusyCycle = regs_->cycle + c;
534 psxMULTU(regs_, code);
537 /*********************************************************
538 * Register branch logic *
539 * Format: OP rs, offset *
540 *********************************************************/
541 #define RepZBranchi32(op) \
542 if(_i32(_rRs_) op 0) \
543 doBranch(regs_, _BranchTarget_);
544 #define RepZBranchLinki32(op) { \
545 s32 temp = _i32(_rRs_); \
548 doBranch(regs_, _BranchTarget_); \
551 OP(psxBGEZ) { RepZBranchi32(>=) } // Branch if Rs >= 0
552 OP(psxBGEZAL) { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link
553 OP(psxBGTZ) { RepZBranchi32(>) } // Branch if Rs > 0
554 OP(psxBLEZ) { RepZBranchi32(<=) } // Branch if Rs <= 0
555 OP(psxBLTZ) { RepZBranchi32(<) } // Branch if Rs < 0
556 OP(psxBLTZAL) { RepZBranchLinki32(<) } // Branch if Rs < 0 and link
558 /*********************************************************
559 * Shift arithmetic with constant shift *
560 * Format: OP rd, rt, sa *
561 *********************************************************/
562 OP(psxSLL) { dloadRt(regs_, _Rd_, _u32(_rRt_) << _Sa_); } // Rd = Rt << sa
563 OP(psxSRA) { dloadRt(regs_, _Rd_, _i32(_rRt_) >> _Sa_); } // Rd = Rt >> sa (arithmetic)
564 OP(psxSRL) { dloadRt(regs_, _Rd_, _u32(_rRt_) >> _Sa_); } // Rd = Rt >> sa (logical)
566 /*********************************************************
567 * Shift arithmetic with variant register shift *
568 * Format: OP rd, rt, rs *
569 *********************************************************/
570 OP(psxSLLV) { dloadRt(regs_, _Rd_, _u32(_rRt_) << (_u32(_rRs_) & 0x1F)); } // Rd = Rt << rs
571 OP(psxSRAV) { dloadRt(regs_, _Rd_, _i32(_rRt_) >> (_u32(_rRs_) & 0x1F)); } // Rd = Rt >> rs (arithmetic)
572 OP(psxSRLV) { dloadRt(regs_, _Rd_, _u32(_rRt_) >> (_u32(_rRs_) & 0x1F)); } // Rd = Rt >> rs (logical)
574 /*********************************************************
575 * Load higher 16 bits of the first word in GPR with imm *
576 * Format: OP rt, immediate *
577 *********************************************************/
578 OP(psxLUI) { dloadRt(regs_, _Rt_, code << 16); } // Upper halfword of Rt = Im
580 /*********************************************************
581 * Move from HI/LO to GPR *
583 *********************************************************/
584 OP(psxMFHI) { dloadRt(regs_, _Rd_, _rHi_); } // Rd = Hi
585 OP(psxMFLO) { dloadRt(regs_, _Rd_, _rLo_); } // Rd = Lo
587 static void mflohiCheckStall(psxRegisters *regs_)
589 u32 left = regs_->muldivBusyCycle - regs_->cycle;
591 //printf("muldiv stall %u\n", left);
592 regs_->cycle = regs_->muldivBusyCycle;
596 OP(psxMFHI_stall) { mflohiCheckStall(regs_); psxMFHI(regs_, code); }
597 OP(psxMFLO_stall) { mflohiCheckStall(regs_); psxMFLO(regs_, code); }
599 /*********************************************************
600 * Move to GPR to HI/LO & Register jump *
602 *********************************************************/
603 OP(psxMTHI) { _rHi_ = _rRs_; } // Hi = Rs
604 OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs
606 /*********************************************************
607 * Special purpose instructions *
609 *********************************************************/
611 intException(regs_, regs_->pc - 4, R3000E_Bp << 2);
615 intException(regs_, regs_->pc - 4, R3000E_Syscall << 2);
618 static inline void execI_(u8 **memRLUT, psxRegisters *regs_);
620 static inline void psxTestSWInts(psxRegisters *regs_, int step) {
621 if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 &&
622 regs_->CP0.n.Status & 0x1) {
624 execI_(psxMemRLUT, regs_);
625 regs_->CP0.n.Cause &= ~0x7c;
626 intException(regs_, regs_->pc, regs_->CP0.n.Cause);
631 // SysPrintf("psxRFE\n");
632 regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) |
633 ((regs_->CP0.n.Status & 0x3c) >> 2);
634 psxTestSWInts(regs_, 0);
637 /*********************************************************
638 * Register branch logic *
639 * Format: OP rs, rt, offset *
640 *********************************************************/
641 #define RepBranchi32(op) { \
642 if (_i32(_rRs_) op _i32(_rRt_)) \
643 doBranch(regs_, _BranchTarget_); \
646 OP(psxBEQ) { RepBranchi32(==) } // Branch if Rs == Rt
647 OP(psxBNE) { RepBranchi32(!=) } // Branch if Rs != Rt
649 /*********************************************************
651 * Format: OP target *
652 *********************************************************/
653 OP(psxJ) { doBranch(regs_, _JumpTarget_); }
654 OP(psxJAL) { _SetLink(31); doBranch(regs_, _JumpTarget_); }
656 /*********************************************************
658 * Format: OP rs, rd *
659 *********************************************************/
661 doBranchReg(regs_, _rRs_);
666 u32 temp = _u32(_rRs_);
667 if (_Rd_) { _SetLink(_Rd_); }
668 doBranchReg(regs_, temp);
671 /*********************************************************
672 * Load and store for GPR *
673 * Format: OP rt, offset(base) *
674 *********************************************************/
676 static int algnChkL(psxRegisters *regs, u32 addr, u32 m) {
677 if (unlikely(addr & m)) {
678 log_unhandled("unaligned load %08x @%08x\n", addr, regs->pc - 4);
679 #ifdef DO_EXCEPTION_ALIGNMENT_DATA
680 psxRegs.CP0.n.BadVAddr = addr;
681 intException(regs, regs->pc - 4, R3000E_AdEL << 2);
688 static int algnChkS(psxRegisters *regs, u32 addr, u32 m) {
689 if (unlikely(addr & m)) {
690 log_unhandled("unaligned store %08x @%08x\n", addr, regs->pc - 4);
691 #ifdef DO_EXCEPTION_ALIGNMENT_DATA
692 psxRegs.CP0.n.BadVAddr = addr;
693 intException(regs, regs->pc - 4, R3000E_AdES << 2);
700 /*********************************************************
701 * Load and store for GPR *
702 * Format: OP rt, offset(base) *
703 *********************************************************/
705 #define _oB_ (regs_->GPR.r[_Rs_] + _Imm_)
707 OP(psxLB) { doLoad(regs_, _Rt_, (s8)psxMemRead8(_oB_)); }
708 OP(psxLBU) { doLoad(regs_, _Rt_, psxMemRead8(_oB_)); }
709 OP(psxLH) { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_, (s16)psxMemRead16(_oB_)); }
710 OP(psxLHU) { if (algnChkL(regs_, _oB_, 1)) doLoad(regs_, _Rt_, psxMemRead16(_oB_)); }
711 OP(psxLW) { if (algnChkL(regs_, _oB_, 3)) doLoad(regs_, _Rt_, psxMemRead32(_oB_)); }
714 static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 };
715 static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 };
716 u32 addr = _oB_, val;
717 u32 shift = addr & 3;
718 u32 mem = psxMemRead32(addr & ~3);
720 u32 oldval = regs_->GPR.r[rt];
722 #ifdef HANDLE_LOAD_DELAY
723 int sel = regs_->dloadSel;
724 if (regs_->dloadReg[sel] == rt)
725 oldval = regs_->dloadVal[sel];
727 val = (oldval & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]);
728 doLoad(regs_, rt, val);
731 Mem = 1234. Reg = abcd
733 0 4bcd (mem << 24) | (reg & 0x00ffffff)
734 1 34cd (mem << 16) | (reg & 0x0000ffff)
735 2 234d (mem << 8) | (reg & 0x000000ff)
736 3 1234 (mem ) | (reg & 0x00000000)
741 static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 };
742 static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 };
743 u32 addr = _oB_, val;
744 u32 shift = addr & 3;
745 u32 mem = psxMemRead32(addr & ~3);
747 u32 oldval = regs_->GPR.r[rt];
749 #ifdef HANDLE_LOAD_DELAY
750 int sel = regs_->dloadSel;
751 if (regs_->dloadReg[sel] == rt)
752 oldval = regs_->dloadVal[sel];
754 val = (oldval & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]);
755 doLoad(regs_, rt, val);
758 Mem = 1234. Reg = abcd
760 0 1234 (mem ) | (reg & 0x00000000)
761 1 a123 (mem >> 8) | (reg & 0xff000000)
762 2 ab12 (mem >> 16) | (reg & 0xffff0000)
763 3 abc1 (mem >> 24) | (reg & 0xffffff00)
767 OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ & 0xff); }
768 OP(psxSH) { if (algnChkS(regs_, _oB_, 1)) psxMemWrite16(_oB_, _rRt_ & 0xffff); }
769 OP(psxSW) { if (algnChkS(regs_, _oB_, 3)) psxMemWrite32(_oB_, _rRt_); }
771 // FIXME: this rmw implementation is wrong and would break on io like fifos
773 static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 };
774 static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 };
776 u32 shift = addr & 3;
777 u32 mem = psxMemRead32(addr & ~3);
779 psxMemWrite32(addr & ~3, (_u32(_rRt_) >> SWL_SHIFT[shift]) |
780 ( mem & SWL_MASK[shift]) );
782 Mem = 1234. Reg = abcd
784 0 123a (reg >> 24) | (mem & 0xffffff00)
785 1 12ab (reg >> 16) | (mem & 0xffff0000)
786 2 1abc (reg >> 8) | (mem & 0xff000000)
787 3 abcd (reg ) | (mem & 0x00000000)
792 static const u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff };
793 static const u32 SWR_SHIFT[4] = { 0, 8, 16, 24 };
795 u32 shift = addr & 3;
796 u32 mem = psxMemRead32(addr & ~3);
798 psxMemWrite32(addr & ~3, (_u32(_rRt_) << SWR_SHIFT[shift]) |
799 ( mem & SWR_MASK[shift]) );
802 Mem = 1234. Reg = abcd
804 0 abcd (reg ) | (mem & 0x00000000)
805 1 bcd4 (reg << 8) | (mem & 0x000000ff)
806 2 cd34 (reg << 16) | (mem & 0x0000ffff)
807 3 d234 (reg << 24) | (mem & 0x00ffffff)
811 /*********************************************************
812 * Moves between GPR and COPx *
813 * Format: OP rt, fs *
814 *********************************************************/
817 #ifdef DO_EXCEPTION_RESERVEDI
818 if (unlikely(r == 0))
819 intException(regs_, regs_->pc - 4, R3000E_RI << 2);
821 doLoad(regs_, _Rt_, regs_->CP0.r[r]);
824 OP(psxCFC0) { doLoad(regs_, _Rt_, regs_->CP0.r[_Rd_]); }
826 static void setupCop(u32 sr);
828 void MTC0(psxRegisters *regs_, int reg, u32 val) {
829 // SysPrintf("MTC0 %d: %x\n", reg, val);
832 if (unlikely((regs_->CP0.n.Status ^ val) & (1 << 16)))
833 psxMemOnIsolate((val >> 16) & 1);
834 if (unlikely((regs_->CP0.n.Status ^ val) & (7 << 29)))
836 regs_->CP0.n.Status = val;
837 psxTestSWInts(regs_, 1);
841 regs_->CP0.n.Cause &= ~0x0300;
842 regs_->CP0.n.Cause |= val & 0x0300;
843 psxTestSWInts(regs_, 0);
847 regs_->CP0.r[reg] = val;
852 OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
853 OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
855 /*********************************************************
856 * Unknown instruction (would generate an exception) *
858 *********************************************************/
859 static inline void psxNULL_(void) {
860 //printf("op %08x @%08x\n", psxRegs.code, psxRegs.pc);
865 #ifdef DO_EXCEPTION_RESERVEDI
866 intException(regs_, regs_->pc - 4, R3000E_RI << 2);
870 void gteNULL(struct psxCP2Regs *regs) {
875 psxSPC[_Funct_](regs_, code);
880 case 0x00: psxMFC0(regs_, code); break;
881 case 0x02: psxCFC0(regs_, code); break;
882 case 0x04: psxMTC0(regs_, code); break;
883 case 0x06: psxCTC0(regs_, code); break;
884 case 0x10: psxRFE(regs_, code); break;
885 default: psxNULL_(); break;
890 // MTC0(regs_, _Rt_, psxMemRead32(_oB_)); // ?
891 log_unhandled("LWC0 %08x\n", code);
895 // ??? what actually happens here?
899 #ifdef DO_EXCEPTION_RESERVEDI
900 intException(regs_, regs_->pc - 4, (1<<28) | (R3000E_RI << 2));
905 psxCP2[_Funct_](®s_->CP2);
911 psxCP2[f](®s_->CP2);
915 #ifdef DO_EXCEPTION_RESERVEDI
916 intException(regs_, regs_->pc - 4, (2<<28) | (R3000E_RI << 2));
921 doLoad(regs_, _Rt_, MFC2(®s_->CP2, _Rd_));
925 doLoad(regs_, _Rt_, regs_->CP2C.r[_Rd_]);
929 MTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_);
933 CTC2(®s_->CP2, regs_->GPR.r[_Rt_], _Rd_);
937 MTC2(®s_->CP2, psxMemRead32(_oB_), _Rt_);
941 psxMemWrite32(_oB_, MFC2(®s_->CP2, _Rt_));
946 gteLWC2(regs_, code);
951 gteSWC2(regs_, code);
955 // ??? what actually happens here?
959 #ifdef DO_EXCEPTION_RESERVEDI
960 intException(regs_, regs_->pc - 4, (3<<28) | (R3000E_RI << 2));
965 // does this read memory?
966 log_unhandled("LWCx %08x\n", code);
970 // does this write something to memory?
971 log_unhandled("SWCx %08x\n", code);
974 static void psxBASIC(struct psxCP2Regs *cp2regs) {
975 psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2));
976 u32 code = regs_->code;
977 assert(regs_ == &psxRegs);
979 case 0x00: gteMFC2(regs_, code); break;
980 case 0x02: gteCFC2(regs_, code); break;
981 case 0x04: gteMTC2(regs_, code); break;
982 case 0x06: gteCTC2(regs_, code); break;
983 default: psxNULL_(); break;
990 case 0x10: psxBLTZAL(regs_, code); break;
991 case 0x11: psxBGEZAL(regs_, code); break;
994 psxBGEZ(regs_, code);
996 psxBLTZ(regs_, code);
1002 if (unlikely(!Config.HLE)) {
1003 psxSWCx(regs_, code);
1006 hleCode = code & 0x03ffffff;
1007 if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) {
1008 psxSWCx(regs_, code);
1014 static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = {
1015 psxSPECIAL, psxREGIMM, psxJ , psxJAL , psxBEQ , psxBNE , psxBLEZ, psxBGTZ,
1016 psxADDI , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI ,
1017 psxCOP0 , psxCOP1d , psxCOP2, psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1018 psxNULL , psxCOP1d , psxCOP2d,psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1019 psxLB , psxLH , psxLWL , psxLW , psxLBU , psxLHU , psxLWR , psxCOP3d,
1020 psxSB , psxSH , psxSWL , psxSW , psxNULL, psxCOP1d,psxSWR , psxCOP3d,
1021 psxLWC0 , psxLWCx , gteLWC2, psxLWCx , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1022 psxSWCx , psxSWCx , gteSWC2, psxHLE , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1025 static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = {
1026 psxSLL , psxNULL , psxSRL , psxSRA , psxSLLV , psxNULL , psxSRLV, psxSRAV,
1027 psxJR , psxJALR , psxNULL, psxNULL, psxSYSCALL, psxBREAK, psxNULL, psxNULL,
1028 psxMFHI, psxMTHI , psxMFLO, psxMTLO, psxNULL , psxNULL , psxNULL, psxNULL,
1029 psxMULT, psxMULTU, psxDIV , psxDIVU, psxNULL , psxNULL , psxNULL, psxNULL,
1030 psxADD , psxADDU , psxSUB , psxSUBU, psxAND , psxOR , psxXOR , psxNOR ,
1031 psxNULL, psxNULL , psxSLT , psxSLTU, psxNULL , psxNULL , psxNULL, psxNULL,
1032 psxNULL, psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, psxNULL,
1033 psxNULL, psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, psxNULL
1036 void (*psxCP2[64])(struct psxCP2Regs *regs) = {
1037 psxBASIC, gteRTPS , gteNULL , gteNULL, gteNULL, gteNULL , gteNCLIP, gteNULL, // 00
1038 gteNULL , gteNULL , gteNULL , gteNULL, gteOP , gteNULL , gteNULL , gteNULL, // 08
1039 gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , gteNULL , gteNCDT , gteNULL, // 10
1040 gteNULL , gteNULL , gteNULL , gteNCCS, gteCC , gteNULL , gteNCS , gteNULL, // 18
1041 gteNCT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 20
1042 gteSQR , gteDCPL , gteDPCT , gteNULL, gteNULL, gteAVSZ3, gteAVSZ4, gteNULL, // 28
1043 gteRTPT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 30
1044 gteNULL , gteNULL , gteNULL , gteNULL, gteNULL, gteGPF , gteGPL , gteNCCT // 38
1047 ///////////////////////////////////////////
1049 static int intInit() {
1053 static void intReset() {
1054 dloadClear(&psxRegs);
1057 static inline void execI_(u8 **memRLUT, psxRegisters *regs_) {
1060 regs_->code = fetch(regs_, memRLUT, pc);
1065 psxBSC[regs_->code >> 26](regs_, regs_->code);
1068 static void intExecute() {
1069 psxRegisters *regs_ = &psxRegs;
1070 u8 **memRLUT = psxMemRLUT;
1074 execI_(memRLUT, regs_);
1077 void intExecuteBlock(enum blockExecCaller caller) {
1078 psxRegisters *regs_ = &psxRegs;
1079 u8 **memRLUT = psxMemRLUT;
1083 execI_(memRLUT, regs_);
1086 static void intClear(u32 Addr, u32 Size) {
1089 static void intNotify(enum R3000Anote note, void *data) {
1091 case R3000ACPU_NOTIFY_BEFORE_SAVE:
1092 dloadFlush(&psxRegs);
1094 case R3000ACPU_NOTIFY_AFTER_LOAD:
1095 dloadClear(&psxRegs);
1096 setupCop(psxRegs.CP0.n.Status);
1098 case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core?
1099 memset(&ICache, 0xff, sizeof(ICache));
1101 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
1106 static void setupCop(u32 sr)
1108 if (sr & (1u << 29))
1109 psxBSC[17] = psxCOP1;
1111 psxBSC[17] = psxCOP1d;
1112 if (sr & (1u << 30))
1113 psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall;
1115 psxBSC[18] = psxCOP2d;
1116 if (sr & (1u << 31))
1117 psxBSC[19] = psxCOP3;
1119 psxBSC[19] = psxCOP3d;
1122 void intApplyConfig() {
1125 assert(psxBSC[50] == gteLWC2 || psxBSC[50] == gteLWC2_stall);
1126 assert(psxBSC[58] == gteSWC2 || psxBSC[58] == gteSWC2_stall);
1127 assert(psxSPC[16] == psxMFHI || psxSPC[16] == psxMFHI_stall);
1128 assert(psxSPC[18] == psxMFLO || psxSPC[18] == psxMFLO_stall);
1129 assert(psxSPC[24] == psxMULT || psxSPC[24] == psxMULT_stall);
1130 assert(psxSPC[25] == psxMULTU || psxSPC[25] == psxMULTU_stall);
1131 assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall);
1132 assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall);
1134 if (Config.DisableStalls) {
1135 psxBSC[18] = psxCOP2;
1136 psxBSC[50] = gteLWC2;
1137 psxBSC[58] = gteSWC2;
1138 psxSPC[16] = psxMFHI;
1139 psxSPC[18] = psxMFLO;
1140 psxSPC[24] = psxMULT;
1141 psxSPC[25] = psxMULTU;
1142 psxSPC[26] = psxDIV;
1143 psxSPC[27] = psxDIVU;
1145 psxBSC[18] = psxCOP2_stall;
1146 psxBSC[50] = gteLWC2_stall;
1147 psxBSC[58] = gteSWC2_stall;
1148 psxSPC[16] = psxMFHI_stall;
1149 psxSPC[18] = psxMFLO_stall;
1150 psxSPC[24] = psxMULT_stall;
1151 psxSPC[25] = psxMULTU_stall;
1152 psxSPC[26] = psxDIV_stall;
1153 psxSPC[27] = psxDIVU_stall;
1155 setupCop(psxRegs.CP0.n.Status);
1157 // dynarec may occasionally call the interpreter, in such a case the
1158 // cache won't work (cache only works right if all fetches go through it)
1159 if (!Config.icache_emulation || psxCpu != &psxInt)
1160 fetch = fetchNoCache;
1162 fetch = fetchICache;
1164 cycle_mult = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT
1165 ? Config.cycle_multiplier_override : Config.cycle_multiplier;
1166 psxRegs.subCycleStep = 0x10000 * cycle_mult / 100;
1169 static void intShutdown() {
1172 // single step (may do several ops in case of a branch)
1174 execI_(psxMemRLUT, &psxRegs);
1177 R3000Acpu psxInt = {