psxinterpreter: rework branching in ds
[pcsx_rearmed.git] / libpcsxcore / psxinterpreter.c
1 /***************************************************************************
2  *   Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team              *
3  *                                                                         *
4  *   This program is free software; you can redistribute it and/or modify  *
5  *   it under the terms of the GNU General Public License as published by  *
6  *   the Free Software Foundation; either version 2 of the License, or     *
7  *   (at your option) any later version.                                   *
8  *                                                                         *
9  *   This program is distributed in the hope that it will be useful,       *
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
12  *   GNU General Public License for more details.                          *
13  *                                                                         *
14  *   You should have received a copy of the GNU General Public License     *
15  *   along with this program; if not, write to the                         *
16  *   Free Software Foundation, Inc.,                                       *
17  *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
18  ***************************************************************************/
19
20 /*
21  * PSX assembly interpreter.
22  */
23
24 #include "psxcommon.h"
25 #include "r3000a.h"
26 #include "gte.h"
27 #include "psxhle.h"
28 #include "psxinterpreter.h"
29 #include <stddef.h>
30 #include <assert.h>
31 #include "../include/compiler_features.h"
32
33 // these may cause issues: because of poor timing we may step
34 // on instructions that real hardware would never reach
35 #define DO_EXCEPTION_RESERVEDI
36 #define DO_EXCEPTION_ADDR_ERR
37
38 static int branch = 0;
39 static int branch2 = 0;
40
41 #ifdef __i386__
42 #define INT_ATTR __attribute__((regparm(2)))
43 #else
44 #define INT_ATTR
45 #endif
46 #ifndef INVALID_PTR
47 #define INVALID_PTR NULL
48 #endif
49
50 // Subsets
51 static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code);
52 static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code);
53
54 // get an opcode without triggering exceptions or affecting cache
55 u32 intFakeFetch(u32 pc)
56 {
57         u8 *base = psxMemRLUT[pc >> 16];
58         u32 *code;
59         if (unlikely(base == INVALID_PTR))
60                 return 0; // nop
61         code = (u32 *)(base + (pc & 0xfffc));
62         return SWAP32(*code);
63
64 }
65
66 static u32 INT_ATTR fetchNoCache(psxRegisters *regs, u8 **memRLUT, u32 pc)
67 {
68         u8 *base = memRLUT[pc >> 16];
69         u32 *code;
70         if (unlikely(base == INVALID_PTR)) {
71                 SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra);
72                 regs->pc = pc;
73                 psxException(R3000E_IBE << 2, branch, &regs->CP0);
74                 return 0; // execute as nop
75         }
76         code = (u32 *)(base + (pc & 0xfffc));
77         return SWAP32(*code);
78 }
79
80 /*
81 Formula One 2001 :
82 Use old CPU cache code when the RAM location is updated with new code (affects in-game racing)
83 */
84 static struct cache_entry {
85         u32 tag;
86         u32 data[4];
87 } ICache[256];
88
89 static u32 INT_ATTR fetchICache(psxRegisters *regs, u8 **memRLUT, u32 pc)
90 {
91         // cached?
92         if (pc < 0xa0000000)
93         {
94                 // this is not how the hardware works but whatever
95                 struct cache_entry *entry = &ICache[(pc & 0xff0) >> 4];
96
97                 if (((entry->tag ^ pc) & 0xfffffff0) != 0 || pc < entry->tag)
98                 {
99                         const u8 *base = memRLUT[pc >> 16];
100                         const u32 *code;
101                         if (unlikely(base == INVALID_PTR)) {
102                                 SysPrintf("game crash @%08x, ra=%08x\n", pc, regs->GPR.n.ra);
103                                 regs->pc = pc;
104                                 psxException(R3000E_IBE << 2, branch, &regs->CP0);
105                                 return 0; // execute as nop
106                         }
107                         code = (u32 *)(base + (pc & 0xfff0));
108
109                         entry->tag = pc;
110                         // treat as 4 words, although other configurations are said to be possible
111                         switch (pc & 0x0c)
112                         {
113                                 case 0x00: entry->data[0] = SWAP32(code[0]);
114                                 case 0x04: entry->data[1] = SWAP32(code[1]);
115                                 case 0x08: entry->data[2] = SWAP32(code[2]);
116                                 case 0x0c: entry->data[3] = SWAP32(code[3]);
117                         }
118                 }
119                 return entry->data[(pc & 0x0f) >> 2];
120         }
121
122         return fetchNoCache(regs, memRLUT, pc);
123 }
124
125 static u32 (INT_ATTR *fetch)(psxRegisters *regs_, u8 **memRLUT, u32 pc) = fetchNoCache;
126
127 // Make the timing events trigger faster as we are currently assuming everything
128 // takes one cycle, which is not the case on real hardware.
129 // FIXME: count cache misses, memory latencies, stalls to get rid of this
130 static inline void addCycle(void)
131 {
132         assert(psxRegs.subCycleStep >= 0x10000);
133         psxRegs.subCycle += psxRegs.subCycleStep;
134         psxRegs.cycle += psxRegs.subCycle >> 16;
135         psxRegs.subCycle &= 0xffff;
136 }
137
138 static void delayRead(int reg, u32 bpc) {
139         u32 rold, rnew;
140
141 //      SysPrintf("delayRead at %x!\n", psxRegs.pc);
142
143         rold = psxRegs.GPR.r[reg];
144         psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code); // branch delay load
145         rnew = psxRegs.GPR.r[reg];
146
147         psxRegs.pc = bpc;
148
149         branch = 0;
150
151         psxRegs.GPR.r[reg] = rold;
152         execI(); // first branch opcode
153         psxRegs.GPR.r[reg] = rnew;
154
155         psxBranchTest();
156 }
157
158 static void delayWrite(int reg, u32 bpc) {
159
160 /*      SysPrintf("delayWrite at %x!\n", psxRegs.pc);
161
162         SysPrintf("%s\n", disR3000AF(psxRegs.code, psxRegs.pc-4));
163         SysPrintf("%s\n", disR3000AF(PSXMu32(bpc), bpc));*/
164
165         // no changes from normal behavior
166
167         psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code);
168
169         branch = 0;
170         psxRegs.pc = bpc;
171
172         psxBranchTest();
173 }
174
175 static void delayReadWrite(int reg, u32 bpc) {
176
177 //      SysPrintf("delayReadWrite at %x!\n", psxRegs.pc);
178
179         // the branch delay load is skipped
180
181         branch = 0;
182         psxRegs.pc = bpc;
183
184         psxBranchTest();
185 }
186
187 /**** R3000A Instruction Macros ****/
188 #define _PC_            regs_->pc       // The next PC to be executed
189
190 #define _fOp_(code)     ((code >> 26)       )  // The opcode part of the instruction register
191 #define _fFunct_(code)  ((code      ) & 0x3F)  // The funct part of the instruction register
192 #define _fRd_(code)     ((code >> 11) & 0x1F)  // The rd part of the instruction register
193 #define _fRt_(code)     ((code >> 16) & 0x1F)  // The rt part of the instruction register
194 #define _fRs_(code)     ((code >> 21) & 0x1F)  // The rs part of the instruction register
195 #define _fSa_(code)     ((code >>  6) & 0x1F)  // The sa part of the instruction register
196 #define _fIm_(code)     ((u16)code)            // The immediate part of the instruction register
197 #define _fTarget_(code) (code & 0x03ffffff)    // The target part of the instruction register
198
199 #define _fImm_(code)    ((s16)code)            // sign-extended immediate
200 #define _fImmU_(code)   (code&0xffff)          // zero-extended immediate
201
202 #define _Op_     _fOp_(code)
203 #define _Funct_  _fFunct_(code)
204 #define _Rd_     _fRd_(code)
205 #define _Rt_     _fRt_(code)
206 #define _Rs_     _fRs_(code)
207 #define _Sa_     _fSa_(code)
208 #define _Im_     _fIm_(code)
209 #define _Target_ _fTarget_(code)
210
211 #define _Imm_    _fImm_(code)
212 #define _ImmU_   _fImmU_(code)
213
214 #define _rRs_   regs_->GPR.r[_Rs_]   // Rs register
215 #define _rRt_   regs_->GPR.r[_Rt_]   // Rt register
216 #define _rRd_   regs_->GPR.r[_Rd_]   // Rd register
217 #define _rSa_   regs_->GPR.r[_Sa_]   // Sa register
218 #define _rFs_   regs_->CP0.r[_Rd_]   // Fs register
219
220 #define _rHi_   regs_->GPR.n.hi   // The HI register
221 #define _rLo_   regs_->GPR.n.lo   // The LO register
222
223 #define _JumpTarget_    ((_Target_ * 4) + (_PC_ & 0xf0000000))   // Calculates the target during a jump instruction
224 #define _BranchTarget_  ((s16)_Im_ * 4 + _PC_)                 // Calculates the target during a branch instruction
225
226 #define _SetLink(x)     regs_->GPR.r[x] = _PC_ + 4;       // Sets the return address in the link register
227
228 #define OP(name) \
229         static inline INT_ATTR void name(psxRegisters *regs_, u32 code)
230
231 // this defines shall be used with the tmp 
232 // of the next func (instead of _Funct_...)
233 #define _tFunct_  ((tmp      ) & 0x3F)  // The funct part of the instruction register 
234 #define _tRd_     ((tmp >> 11) & 0x1F)  // The rd part of the instruction register 
235 #define _tRt_     ((tmp >> 16) & 0x1F)  // The rt part of the instruction register 
236 #define _tRs_     ((tmp >> 21) & 0x1F)  // The rs part of the instruction register 
237 #define _tSa_     ((tmp >>  6) & 0x1F)  // The sa part of the instruction register
238
239 #define _i32(x) (s32)(x)
240 #define _u32(x) (u32)(x)
241
242 static int psxTestLoadDelay(int reg, u32 tmp) {
243         if (tmp == 0) return 0; // NOP
244         switch (tmp >> 26) {
245                 case 0x00: // SPECIAL
246                         switch (_tFunct_) {
247                                 case 0x00: // SLL
248                                 case 0x02: case 0x03: // SRL/SRA
249                                         if (_tRd_ == reg && _tRt_ == reg) return 1; else
250                                         if (_tRt_ == reg) return 2; else
251                                         if (_tRd_ == reg) return 3;
252                                         break;
253
254                                 case 0x08: // JR
255                                         if (_tRs_ == reg) return 2;
256                                         break;
257                                 case 0x09: // JALR
258                                         if (_tRd_ == reg && _tRs_ == reg) return 1; else
259                                         if (_tRs_ == reg) return 2; else
260                                         if (_tRd_ == reg) return 3;
261                                         break;
262
263                                 // SYSCALL/BREAK just a break;
264
265                                 case 0x20: case 0x21: case 0x22: case 0x23:
266                                 case 0x24: case 0x25: case 0x26: case 0x27: 
267                                 case 0x2a: case 0x2b: // ADD/ADDU...
268                                 case 0x04: case 0x06: case 0x07: // SLLV...
269                                         if (_tRd_ == reg && (_tRt_ == reg || _tRs_ == reg)) return 1; else
270                                         if (_tRt_ == reg || _tRs_ == reg) return 2; else
271                                         if (_tRd_ == reg) return 3;
272                                         break;
273
274                                 case 0x10: case 0x12: // MFHI/MFLO
275                                         if (_tRd_ == reg) return 3;
276                                         break;
277                                 case 0x11: case 0x13: // MTHI/MTLO
278                                         if (_tRs_ == reg) return 2;
279                                         break;
280
281                                 case 0x18: case 0x19:
282                                 case 0x1a: case 0x1b: // MULT/DIV...
283                                         if (_tRt_ == reg || _tRs_ == reg) return 2;
284                                         break;
285                         }
286                         break;
287
288                 case 0x01: // REGIMM - BLTZ/BGEZ...
289                         // Xenogears - lbu v0 / beq v0
290                         // - no load delay (fixes battle loading)
291                         break;
292
293                 // J would be just a break;
294                 case 0x03: // JAL
295                         if (31 == reg) return 3;
296                         break;
297
298                 case 0x06: case 0x07: // BLEZ/BGTZ
299                 case 0x04: case 0x05: // BEQ/BNE
300                         // Xenogears - lbu v0 / beq v0
301                         // - no load delay (fixes battle loading)
302                         break;
303
304                 case 0x08: case 0x09: case 0x0a: case 0x0b:
305                 case 0x0c: case 0x0d: case 0x0e: // ADDI/ADDIU...
306                         if (_tRt_ == reg && _tRs_ == reg) return 1; else
307                         if (_tRs_ == reg) return 2; else
308                         if (_tRt_ == reg) return 3;
309                         break;
310
311                 case 0x0f: // LUI
312                         if (_tRt_ == reg) return 3;
313                         break;
314
315                 case 0x10: // COP0
316                         switch (_tFunct_) {
317                                 case 0x00: // MFC0
318                                         if (_tRt_ == reg) return 3;
319                                         break;
320                                 case 0x02: // CFC0
321                                         if (_tRt_ == reg) return 3;
322                                         break;
323                                 case 0x04: // MTC0
324                                         if (_tRt_ == reg) return 2;
325                                         break;
326                                 case 0x06: // CTC0
327                                         if (_tRt_ == reg) return 2;
328                                         break;
329                                 // RFE just a break;
330                         }
331                         break;
332
333                 case 0x12: // COP2
334                         switch (_tFunct_) {
335                                 case 0x00: 
336                                         switch (_tRs_) {
337                                                 case 0x00: // MFC2
338                                                         if (_tRt_ == reg) return 3;
339                                                         break;
340                                                 case 0x02: // CFC2
341                                                         if (_tRt_ == reg) return 3;
342                                                         break;
343                                                 case 0x04: // MTC2
344                                                         if (_tRt_ == reg) return 2;
345                                                         break;
346                                                 case 0x06: // CTC2
347                                                         if (_tRt_ == reg) return 2;
348                                                         break;
349                                         }
350                                         break;
351                                 // RTPS... break;
352                         }
353                         break;
354
355                 case 0x22: case 0x26: // LWL/LWR
356                         if (_tRt_ == reg) return 3; else
357                         if (_tRs_ == reg) return 2;
358                         break;
359
360                 case 0x20: case 0x21: case 0x23:
361                 case 0x24: case 0x25: // LB/LH/LW/LBU/LHU
362                         if (_tRt_ == reg && _tRs_ == reg) return 1; else
363                         if (_tRs_ == reg) return 2; else
364                         if (_tRt_ == reg) return 3;
365                         break;
366
367                 case 0x28: case 0x29: case 0x2a:
368                 case 0x2b: case 0x2e: // SB/SH/SWL/SW/SWR
369                         if (_tRt_ == reg || _tRs_ == reg) return 2;
370                         break;
371
372                 case 0x32: case 0x3a: // LWC2/SWC2
373                         if (_tRs_ == reg) return 2;
374                         break;
375         }
376
377         return 0;
378 }
379
380 static void psxDelayTest(int reg, u32 bpc) {
381         u32 tmp = intFakeFetch(bpc);
382         branch = 1;
383
384         switch (psxTestLoadDelay(reg, tmp)) {
385                 case 1:
386                         delayReadWrite(reg, bpc); return;
387                 case 2:
388                         delayRead(reg, bpc); return;
389                 case 3:
390                         delayWrite(reg, bpc); return;
391         }
392         // DS
393         psxBSC[psxRegs.code >> 26](&psxRegs, psxRegs.code);
394
395         branch = 0;
396         psxRegs.pc = bpc;
397
398         psxBranchTest();
399 }
400
401 #define isBranch(c_) \
402         ((1 <= ((c_) >> 26) && ((c_) >> 26) <= 7) || ((c_) & 0xfc00003e) == 8)
403 #define swap_(a_, b_) { u32 t_ = a_; a_ = b_; b_ = t_; }
404
405 // tar1 is main branch target, 'code' is opcode in DS
406 static u32 psxBranchNoDelay(psxRegisters *regs_, u32 tar1, u32 code, int *taken) {
407         u32 temp, rt;
408
409         assert(isBranch(code));
410         *taken = 1;
411         switch (code >> 26) {
412                 case 0x00: // SPECIAL
413                         switch (_Funct_) {
414                                 case 0x08: // JR
415                                         return _u32(_rRs_);
416                                 case 0x09: // JALR
417                                         temp = _u32(_rRs_);
418                                         if (_Rd_)
419                                                 regs_->GPR.r[_Rd_] = tar1 + 4;
420                                         return temp;
421                         }
422                         break;
423                 case 0x01: // REGIMM
424                         rt = _Rt_;
425                         switch (rt) {
426                                 case 0x10: // BLTZAL
427                                         regs_->GPR.n.ra = tar1 + 4;
428                                         if (_i32(_rRs_) < 0)
429                                                 return tar1 + (s16)_Im_ * 4;
430                                         break;
431                                 case 0x11: // BGEZAL
432                                         regs_->GPR.n.ra = tar1 + 4;
433                                         if (_i32(_rRs_) >= 0)
434                                                 return tar1 + (s16)_Im_ * 4;
435                                         break;
436                                 default:
437                                         if (rt & 1) { // BGEZ
438                                                 if (_i32(_rRs_) >= 0)
439                                                         return tar1 + (s16)_Im_ * 4;
440                                         }
441                                         else {        // BLTZ
442                                                 if (_i32(_rRs_) < 0)
443                                                         return tar1 + (s16)_Im_ * 4;
444                                         }
445                                         break;
446                         }
447                         break;
448                 case 0x02: // J
449                         return (tar1 & 0xf0000000u) + _Target_ * 4;
450                 case 0x03: // JAL
451                         regs_->GPR.n.ra = tar1 + 4;
452                         return (tar1 & 0xf0000000u) + _Target_ * 4;
453                 case 0x04: // BEQ
454                         if (_i32(_rRs_) == _i32(_rRt_))
455                                 return tar1 + (s16)_Im_ * 4;
456                         break;
457                 case 0x05: // BNE
458                         if (_i32(_rRs_) != _i32(_rRt_))
459                                 return tar1 + (s16)_Im_ * 4;
460                         break;
461                 case 0x06: // BLEZ
462                         if (_i32(_rRs_) <= 0)
463                                 return tar1 + (s16)_Im_ * 4;
464                         break;
465                 case 0x07: // BGTZ
466                         if (_i32(_rRs_) > 0)
467                                 return tar1 + (s16)_Im_ * 4;
468                         break;
469         }
470
471         *taken = 0;
472         return tar1;
473 }
474
475 static void psxDoDelayBranch(psxRegisters *regs, u32 tar1, u32 code1) {
476         u32 tar2, code;
477         int taken, lim;
478
479         tar2 = psxBranchNoDelay(regs, tar1, code1, &taken);
480         regs->pc = tar1;
481         if (!taken)
482                 return;
483
484         /*
485          * taken branch in delay slot:
486          * - execute 1 instruction at tar1
487          * - jump to tar2 (target of branch in delay slot; this branch
488          *   has no normal delay slot, instruction at tar1 was fetched instead)
489          */
490         for (lim = 0; lim < 8; lim++) {
491                 regs->code = code = fetch(regs, psxMemRLUT, tar1);
492                 addCycle();
493                 if (likely(!isBranch(code))) {
494                         psxBSC[code >> 26](regs, code);
495                         regs->pc = tar2;
496                         return;
497                 }
498                 tar1 = psxBranchNoDelay(regs, tar2, code, &taken);
499                 regs->pc = tar2;
500                 if (!taken)
501                         return;
502                 swap_(tar1, tar2);
503         }
504         SysPrintf("Evil chained DS branches @ %08x %08x %08x\n", regs->pc, tar1, tar2);
505 }
506
507 static void doBranch(psxRegisters *regs, u32 tar) {
508         u32 tmp, code, pc;
509
510         branch2 = branch = 1;
511
512         // fetch the delay slot
513         pc = regs->pc;
514         regs->pc = pc + 4;
515         regs->code = code = fetch(regs, psxMemRLUT, pc);
516
517         addCycle();
518
519         // check for branch in delay slot
520         if (unlikely(isBranch(code))) {
521                 psxDoDelayBranch(regs, tar, code);
522                 log_unhandled("branch in DS: %08x->%08x\n", pc, regs->pc);
523                 branch = 0;
524                 psxBranchTest();
525                 return;
526         }
527
528         // check for load delay
529         tmp = code >> 26;
530         switch (tmp) {
531                 case 0x10: // COP0
532                         switch (_Rs_) {
533                                 case 0x00: // MFC0
534                                 case 0x02: // CFC0
535                                         psxDelayTest(_Rt_, tar);
536                                         return;
537                         }
538                         break;
539                 case 0x12: // COP2
540                         switch (_Funct_) {
541                                 case 0x00:
542                                         switch (_Rs_) {
543                                                 case 0x00: // MFC2
544                                                 case 0x02: // CFC2
545                                                         psxDelayTest(_Rt_, tar);
546                                                         return;
547                                         }
548                                         break;
549                         }
550                         break;
551                 case 0x32: // LWC2
552                         psxDelayTest(_Rt_, tar);
553                         return;
554                 default:
555                         if (tmp >= 0x20 && tmp <= 0x26) { // LB/LH/LWL/LW/LBU/LHU/LWR
556                                 psxDelayTest(_Rt_, tar);
557                                 return;
558                         }
559                         break;
560         }
561
562         psxBSC[code >> 26](regs, code);
563
564         branch = 0;
565         regs->pc = tar;
566
567         psxBranchTest();
568 }
569
570 static void doBranchReg(psxRegisters *regs, u32 tar) {
571 #ifdef DO_EXCEPTION_ADDR_ERR
572         if (unlikely(tar & 3)) {
573                 psxRegs.pc = psxRegs.CP0.n.BadVAddr = tar;
574                 psxException(R3000E_AdEL << 2, branch, &psxRegs.CP0);
575                 return;
576         }
577 #else
578         tar &= ~3;
579 #endif
580         doBranch(regs, tar);
581 }
582
583 #if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5)
584 #define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r))
585 #define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r))
586 #else
587 #define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);})
588 #define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^  b) & (a ^ r) & (1u<<31);})
589 #endif
590
591 static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
592         s32 r;
593         if (add_overflow(a1, a2, r)) {
594                 //printf("ov %08x + %08x = %08x\n", a1, a2, r);
595                 regs->pc -= 4;
596                 psxException(R3000E_Ov << 2, branch, &regs->CP0);
597                 return;
598         }
599         if (rt)
600                 regs->GPR.r[rt] = r;
601 }
602
603 static void subExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) {
604         s32 r;
605         if (sub_overflow(a1, a2, r)) {
606                 regs->pc -= 4;
607                 psxException(R3000E_Ov << 2, branch, &regs->CP0);
608                 return;
609         }
610         if (rt)
611                 regs->GPR.r[rt] = r;
612 }
613
614 /*********************************************************
615 * Arithmetic with immediate operand                      *
616 * Format:  OP rt, rs, immediate                          *
617 *********************************************************/
618 OP(psxADDI)  { addExc(regs_, _Rt_, _i32(_rRs_), _Imm_); } // Rt = Rs + Im (Exception on Integer Overflow)
619 OP(psxADDIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; }  // Rt = Rs + Im
620 OP(psxANDI)  { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; }  // Rt = Rs And Im
621 OP(psxORI)   { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; }  // Rt = Rs Or  Im
622 OP(psxXORI)  { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; }  // Rt = Rs Xor Im
623 OP(psxSLTI)  { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; }  // Rt = Rs < Im              (Signed)
624 OP(psxSLTIU) { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); } // Rt = Rs < Im         (Unsigned)
625
626 /*********************************************************
627 * Register arithmetic                                    *
628 * Format:  OP rd, rs, rt                                 *
629 *********************************************************/
630 OP(psxADD)   { addExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs + Rt (Exception on Integer Overflow)
631 OP(psxSUB)   { subExc(regs_, _Rd_, _i32(_rRs_), _i32(_rRt_)); } // Rd = Rs - Rt (Exception on Integer Overflow)
632 OP(psxADDU)  { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); }  // Rd = Rs + Rt
633 OP(psxSUBU)  { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); }  // Rd = Rs - Rt
634 OP(psxAND)   { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); }  // Rd = Rs And Rt
635 OP(psxOR)    { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); }  // Rd = Rs Or  Rt
636 OP(psxXOR)   { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); }  // Rd = Rs Xor Rt
637 OP(psxNOR)   { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt
638 OP(psxSLT)   { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); }  // Rd = Rs < Rt         (Signed)
639 OP(psxSLTU)  { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); }  // Rd = Rs < Rt         (Unsigned)
640
641 /*********************************************************
642 * Register mult/div & Register trap logic                *
643 * Format:  OP rs, rt                                     *
644 *********************************************************/
645 OP(psxDIV) {
646         if (!_rRt_) {
647                 _rHi_ = _rRs_;
648                 if (_rRs_ & 0x80000000) {
649                         _rLo_ = 1;
650                 } else {
651                         _rLo_ = 0xFFFFFFFF;
652                 }
653         }
654 #if !defined(__arm__) && !defined(__aarch64__)
655         else if (_rRs_ == 0x80000000 && _rRt_ == 0xFFFFFFFF) {
656                 _rLo_ = 0x80000000;
657                 _rHi_ = 0;
658         }
659 #endif
660         else {
661                 _rLo_ = _i32(_rRs_) / _i32(_rRt_);
662                 _rHi_ = _i32(_rRs_) % _i32(_rRt_);
663         }
664 }
665
666 OP(psxDIV_stall) {
667         regs_->muldivBusyCycle = regs_->cycle + 37;
668         psxDIV(regs_, code);
669 }
670
671 OP(psxDIVU) {
672         if (_rRt_ != 0) {
673                 _rLo_ = _rRs_ / _rRt_;
674                 _rHi_ = _rRs_ % _rRt_;
675         }
676         else {
677                 _rLo_ = 0xffffffff;
678                 _rHi_ = _rRs_;
679         }
680 }
681
682 OP(psxDIVU_stall) {
683         regs_->muldivBusyCycle = regs_->cycle + 37;
684         psxDIVU(regs_, code);
685 }
686
687 OP(psxMULT) {
688         u64 res = (s64)_i32(_rRs_) * _i32(_rRt_);
689
690         regs_->GPR.n.lo = (u32)res;
691         regs_->GPR.n.hi = (u32)(res >> 32);
692 }
693
694 OP(psxMULT_stall) {
695         // approximate, but maybe good enough
696         u32 rs = _rRs_;
697         u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1));
698         u32 c = 7 + (2 - (lz / 11)) * 4;
699         regs_->muldivBusyCycle = regs_->cycle + c;
700         psxMULT(regs_, code);
701 }
702
703 OP(psxMULTU) {
704         u64 res = (u64)_u32(_rRs_) * _u32(_rRt_);
705
706         regs_->GPR.n.lo = (u32)(res & 0xffffffff);
707         regs_->GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
708 }
709
710 OP(psxMULTU_stall) {
711         // approximate, but maybe good enough
712         u32 lz = __builtin_clz(_rRs_ | 1);
713         u32 c = 7 + (2 - (lz / 11)) * 4;
714         regs_->muldivBusyCycle = regs_->cycle + c;
715         psxMULTU(regs_, code);
716 }
717
718 /*********************************************************
719 * Register branch logic                                  *
720 * Format:  OP rs, offset                                 *
721 *********************************************************/
722 #define RepZBranchi32(op) \
723         if(_i32(_rRs_) op 0) \
724                 doBranch(regs_, _BranchTarget_);
725 #define RepZBranchLinki32(op)  { \
726         s32 temp = _i32(_rRs_); \
727         _SetLink(31); \
728         if(temp op 0) \
729                 doBranch(regs_, _BranchTarget_); \
730 }
731
732 OP(psxBGEZ)   { RepZBranchi32(>=) }      // Branch if Rs >= 0
733 OP(psxBGEZAL) { RepZBranchLinki32(>=) }  // Branch if Rs >= 0 and link
734 OP(psxBGTZ)   { RepZBranchi32(>) }       // Branch if Rs >  0
735 OP(psxBLEZ)   { RepZBranchi32(<=) }      // Branch if Rs <= 0
736 OP(psxBLTZ)   { RepZBranchi32(<) }       // Branch if Rs <  0
737 OP(psxBLTZAL) { RepZBranchLinki32(<) }   // Branch if Rs <  0 and link
738
739 /*********************************************************
740 * Shift arithmetic with constant shift                   *
741 * Format:  OP rd, rt, sa                                 *
742 *********************************************************/
743 OP(psxSLL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa
744 OP(psxSRA) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic)
745 OP(psxSRL) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical)
746
747 /*********************************************************
748 * Shift arithmetic with variant register shift           *
749 * Format:  OP rd, rt, rs                                 *
750 *********************************************************/
751 OP(psxSLLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs
752 OP(psxSRAV) { if (!_Rd_) return; _rRd_ = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic)
753 OP(psxSRLV) { if (!_Rd_) return; _rRd_ = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical)
754
755 /*********************************************************
756 * Load higher 16 bits of the first word in GPR with imm  *
757 * Format:  OP rt, immediate                              *
758 *********************************************************/
759 OP(psxLUI) { if (!_Rt_) return; _rRt_ = code << 16; } // Upper halfword of Rt = Im
760
761 /*********************************************************
762 * Move from HI/LO to GPR                                 *
763 * Format:  OP rd                                         *
764 *********************************************************/
765 OP(psxMFHI) { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi
766 OP(psxMFLO) { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo
767
768 static void mflohiCheckStall(psxRegisters *regs_)
769 {
770         u32 left = regs_->muldivBusyCycle - regs_->cycle;
771         if (left <= 37) {
772                 //printf("muldiv stall %u\n", left);
773                 regs_->cycle = regs_->muldivBusyCycle;
774         }
775 }
776
777 OP(psxMFHI_stall) { mflohiCheckStall(regs_); psxMFHI(regs_, code); }
778 OP(psxMFLO_stall) { mflohiCheckStall(regs_); psxMFLO(regs_, code); }
779
780 /*********************************************************
781 * Move to GPR to HI/LO & Register jump                   *
782 * Format:  OP rs                                         *
783 *********************************************************/
784 OP(psxMTHI) { _rHi_ = _rRs_; } // Hi = Rs
785 OP(psxMTLO) { _rLo_ = _rRs_; } // Lo = Rs
786
787 /*********************************************************
788 * Special purpose instructions                           *
789 * Format:  OP                                            *
790 *********************************************************/
791 OP(psxBREAK) {
792         regs_->pc -= 4;
793         psxException(R3000E_Bp << 2, branch, &regs_->CP0);
794 }
795
796 OP(psxSYSCALL) {
797         regs_->pc -= 4;
798         psxException(R3000E_Syscall << 2, branch, &regs_->CP0);
799 }
800
801 static inline void execI_(u8 **memRLUT, psxRegisters *regs_);
802
803 static inline void psxTestSWInts(psxRegisters *regs_, int step) {
804         if (regs_->CP0.n.Cause & regs_->CP0.n.Status & 0x0300 &&
805            regs_->CP0.n.Status & 0x1) {
806                 if (step)
807                         execI_(psxMemRLUT, regs_);
808                 regs_->CP0.n.Cause &= ~0x7c;
809                 psxException(regs_->CP0.n.Cause, branch, &regs_->CP0);
810         }
811 }
812
813 OP(psxRFE) {
814 //      SysPrintf("psxRFE\n");
815         regs_->CP0.n.Status = (regs_->CP0.n.Status & 0xfffffff0) |
816                               ((regs_->CP0.n.Status & 0x3c) >> 2);
817         psxTestSWInts(regs_, 0);
818 }
819
820 /*********************************************************
821 * Register branch logic                                  *
822 * Format:  OP rs, rt, offset                             *
823 *********************************************************/
824 #define RepBranchi32(op) { \
825         if (_i32(_rRs_) op _i32(_rRt_)) \
826                 doBranch(regs_, _BranchTarget_); \
827 }
828
829 OP(psxBEQ) { RepBranchi32(==) }  // Branch if Rs == Rt
830 OP(psxBNE) { RepBranchi32(!=) }  // Branch if Rs != Rt
831
832 /*********************************************************
833 * Jump to target                                         *
834 * Format:  OP target                                     *
835 *********************************************************/
836 OP(psxJ)   {               doBranch(regs_, _JumpTarget_); }
837 OP(psxJAL) { _SetLink(31); doBranch(regs_, _JumpTarget_); }
838
839 /*********************************************************
840 * Register jump                                          *
841 * Format:  OP rs, rd                                     *
842 *********************************************************/
843 OP(psxJR) {
844         doBranchReg(regs_, _rRs_);
845         psxJumpTest();
846 }
847
848 OP(psxJALR) {
849         u32 temp = _u32(_rRs_);
850         if (_Rd_) { _SetLink(_Rd_); }
851         doBranchReg(regs_, temp);
852 }
853
854 /*********************************************************
855 * Load and store for GPR                                 *
856 * Format:  OP rt, offset(base)                           *
857 *********************************************************/
858
859 #define _oB_ (regs_->GPR.r[_Rs_] + _Imm_)
860
861 OP(psxLB)  { u32 v =  (s8)psxMemRead8(_oB_);  if (_Rt_) _rRt_ = v; }
862 OP(psxLBU) { u32 v =      psxMemRead8(_oB_);  if (_Rt_) _rRt_ = v; }
863 OP(psxLH)  { u32 v = (s16)psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; }
864 OP(psxLHU) { u32 v =      psxMemRead16(_oB_); if (_Rt_) _rRt_ = v; }
865 OP(psxLW)  { u32 v =      psxMemRead32(_oB_); if (_Rt_) _rRt_ = v; }
866
867 OP(psxLWL) {
868         static const u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 };
869         static const u32 LWL_SHIFT[4] = { 24, 16, 8, 0 };
870         u32 addr = _oB_;
871         u32 shift = addr & 3;
872         u32 mem = psxMemRead32(addr & ~3);
873
874         if (!_Rt_) return;
875         _rRt_ = (_u32(_rRt_) & LWL_MASK[shift]) | (mem << LWL_SHIFT[shift]);
876
877         /*
878         Mem = 1234.  Reg = abcd
879
880         0   4bcd   (mem << 24) | (reg & 0x00ffffff)
881         1   34cd   (mem << 16) | (reg & 0x0000ffff)
882         2   234d   (mem <<  8) | (reg & 0x000000ff)
883         3   1234   (mem      ) | (reg & 0x00000000)
884         */
885 }
886
887 OP(psxLWR) {
888         static const u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 };
889         static const u32 LWR_SHIFT[4] = { 0, 8, 16, 24 };
890         u32 addr = _oB_;
891         u32 shift = addr & 3;
892         u32 mem = psxMemRead32(addr & ~3);
893
894         if (!_Rt_) return;
895         _rRt_ = (_u32(_rRt_) & LWR_MASK[shift]) | (mem >> LWR_SHIFT[shift]);
896
897         /*
898         Mem = 1234.  Reg = abcd
899
900         0   1234   (mem      ) | (reg & 0x00000000)
901         1   a123   (mem >>  8) | (reg & 0xff000000)
902         2   ab12   (mem >> 16) | (reg & 0xffff0000)
903         3   abc1   (mem >> 24) | (reg & 0xffffff00)
904         */
905 }
906
907 OP(psxSB) { psxMemWrite8 (_oB_, _rRt_ &   0xff); }
908 OP(psxSH) { psxMemWrite16(_oB_, _rRt_ & 0xffff); }
909 OP(psxSW) { psxMemWrite32(_oB_, _rRt_); }
910
911 OP(psxSWL) {
912         static const u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 };
913         static const u32 SWL_SHIFT[4] = { 24, 16, 8, 0 };
914         u32 addr = _oB_;
915         u32 shift = addr & 3;
916         u32 mem = psxMemRead32(addr & ~3);
917
918         psxMemWrite32(addr & ~3,  (_u32(_rRt_) >> SWL_SHIFT[shift]) |
919                              (  mem & SWL_MASK[shift]) );
920         /*
921         Mem = 1234.  Reg = abcd
922
923         0   123a   (reg >> 24) | (mem & 0xffffff00)
924         1   12ab   (reg >> 16) | (mem & 0xffff0000)
925         2   1abc   (reg >>  8) | (mem & 0xff000000)
926         3   abcd   (reg      ) | (mem & 0x00000000)
927         */
928 }
929
930 OP(psxSWR) {
931         static const u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff };
932         static const u32 SWR_SHIFT[4] = { 0, 8, 16, 24 };
933         u32 addr = _oB_;
934         u32 shift = addr & 3;
935         u32 mem = psxMemRead32(addr & ~3);
936
937         psxMemWrite32(addr & ~3,  (_u32(_rRt_) << SWR_SHIFT[shift]) |
938                              (  mem & SWR_MASK[shift]) );
939
940         /*
941         Mem = 1234.  Reg = abcd
942
943         0   abcd   (reg      ) | (mem & 0x00000000)
944         1   bcd4   (reg <<  8) | (mem & 0x000000ff)
945         2   cd34   (reg << 16) | (mem & 0x0000ffff)
946         3   d234   (reg << 24) | (mem & 0x00ffffff)
947         */
948 }
949
950 /*********************************************************
951 * Moves between GPR and COPx                             *
952 * Format:  OP rt, fs                                     *
953 *********************************************************/
954 OP(psxMFC0) {
955         u32 r = _Rd_;
956 #ifdef DO_EXCEPTION_RESERVEDI
957         if (unlikely(r == 0)) {
958                 regs_->pc -= 4;
959                 psxException(R3000E_RI << 2, branch, &regs_->CP0);
960         }
961 #endif
962         if (_Rt_)
963                 _rRt_ = regs_->CP0.r[r];
964 }
965
966 OP(psxCFC0) { if (!_Rt_) return; _rRt_ = _rFs_; }
967
968 static void setupCop(u32 sr);
969
970 void MTC0(psxRegisters *regs_, int reg, u32 val) {
971 //      SysPrintf("MTC0 %d: %x\n", reg, val);
972         switch (reg) {
973                 case 12: // Status
974                         if (unlikely((regs_->CP0.n.Status ^ val) & (1 << 16)))
975                                 psxMemOnIsolate((val >> 16) & 1);
976                         if (unlikely((regs_->CP0.n.Status ^ val) & (7 << 29)))
977                                 setupCop(val);
978                         regs_->CP0.n.Status = val;
979                         psxTestSWInts(regs_, 1);
980                         break;
981
982                 case 13: // Cause
983                         regs_->CP0.n.Cause &= ~0x0300;
984                         regs_->CP0.n.Cause |= val & 0x0300;
985                         psxTestSWInts(regs_, 0);
986                         break;
987
988                 default:
989                         regs_->CP0.r[reg] = val;
990                         break;
991         }
992 }
993
994 OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
995 OP(psxCTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
996
997 /*********************************************************
998 * Unknown instruction (would generate an exception)      *
999 * Format:  ?                                             *
1000 *********************************************************/
1001 static inline void psxNULL_(void) {
1002         //printf("op %08x @%08x\n", psxRegs.code, psxRegs.pc);
1003 }
1004
1005 OP(psxNULL) {
1006         psxNULL_();
1007 #ifdef DO_EXCEPTION_RESERVEDI
1008         regs_->pc -= 4;
1009         psxException(R3000E_RI << 2, branch, &regs_->CP0);
1010 #endif
1011 }
1012
1013 void gteNULL(struct psxCP2Regs *regs) {
1014         psxNULL_();
1015 }
1016
1017 OP(psxSPECIAL) {
1018         psxSPC[_Funct_](regs_, code);
1019 }
1020
1021 OP(psxCOP0) {
1022         switch (_Rs_) {
1023                 case 0x00: psxMFC0(regs_, code); break;
1024                 case 0x02: psxCFC0(regs_, code); break;
1025                 case 0x04: psxMTC0(regs_, code); break;
1026                 case 0x06: psxCTC0(regs_, code); break;
1027                 case 0x10: psxRFE(regs_, code);  break;
1028                 default:   psxNULL_();           break;
1029         }
1030 }
1031
1032 OP(psxLWC0) {
1033         // MTC0(regs_, _Rt_, psxMemRead32(_oB_)); // ?
1034         log_unhandled("LWC0 %08x\n", code);
1035 }
1036
1037 OP(psxCOP1) {
1038         // ??? what actually happens here?
1039 }
1040
1041 OP(psxCOP1d) {
1042 #ifdef DO_EXCEPTION_RESERVEDI
1043         regs_->pc -= 4;
1044         psxException((1<<28) | (R3000E_RI << 2), branch, &regs_->CP0);
1045 #endif
1046 }
1047
1048 OP(psxCOP2) {
1049         psxCP2[_Funct_](&regs_->CP2);
1050 }
1051
1052 OP(psxCOP2_stall) {
1053         u32 f = _Funct_;
1054         gteCheckStall(f);
1055         psxCP2[f](&regs_->CP2);
1056 }
1057
1058 OP(psxCOP2d) {
1059 #ifdef DO_EXCEPTION_RESERVEDI
1060         regs_->pc -= 4;
1061         psxException((2<<28) | (R3000E_RI << 2), branch, &regs_->CP0);
1062 #endif
1063 }
1064
1065 OP(gteMFC2) {
1066         if (!_Rt_) return;
1067         regs_->GPR.r[_Rt_] = MFC2(&regs_->CP2, _Rd_);
1068 }
1069
1070 OP(gteCFC2) {
1071         if (!_Rt_) return;
1072         regs_->GPR.r[_Rt_] = regs_->CP2C.r[_Rd_];
1073 }
1074
1075 OP(gteMTC2) {
1076         MTC2(&regs_->CP2, regs_->GPR.r[_Rt_], _Rd_);
1077 }
1078
1079 OP(gteCTC2) {
1080         CTC2(&regs_->CP2, regs_->GPR.r[_Rt_], _Rd_);
1081 }
1082
1083 OP(gteLWC2) {
1084         MTC2(&regs_->CP2, psxMemRead32(_oB_), _Rt_);
1085 }
1086
1087 OP(gteSWC2) {
1088         psxMemWrite32(_oB_, MFC2(&regs_->CP2, _Rt_));
1089 }
1090
1091 OP(gteLWC2_stall) {
1092         gteCheckStall(0);
1093         gteLWC2(regs_, code);
1094 }
1095
1096 OP(gteSWC2_stall) {
1097         gteCheckStall(0);
1098         gteSWC2(regs_, code);
1099 }
1100
1101 OP(psxCOP3) {
1102         // ??? what actually happens here?
1103 }
1104
1105 OP(psxCOP3d) {
1106 #ifdef DO_EXCEPTION_RESERVEDI
1107         regs_->pc -= 4;
1108         psxException((3<<28) | (R3000E_RI << 2), branch, &regs_->CP0);
1109 #endif
1110 }
1111
1112 OP(psxLWCx) {
1113         // does this read memory?
1114         log_unhandled("LWCx %08x\n", code);
1115 }
1116
1117 OP(psxSWCx) {
1118         // does this write something to memory?
1119         log_unhandled("SWCx %08x\n", code);
1120 }
1121
1122 static void psxBASIC(struct psxCP2Regs *cp2regs) {
1123         psxRegisters *regs_ = (void *)((char *)cp2regs - offsetof(psxRegisters, CP2));
1124         u32 code = regs_->code;
1125         assert(regs_ == &psxRegs);
1126         switch (_Rs_) {
1127                 case 0x00: gteMFC2(regs_, code); break;
1128                 case 0x02: gteCFC2(regs_, code); break;
1129                 case 0x04: gteMTC2(regs_, code); break;
1130                 case 0x06: gteCTC2(regs_, code); break;
1131                 default:   psxNULL_();           break;
1132         }
1133 }
1134
1135 OP(psxREGIMM) {
1136         u32 rt = _Rt_;
1137         switch (rt) {
1138                 case 0x10: psxBLTZAL(regs_, code); break;
1139                 case 0x11: psxBGEZAL(regs_, code); break;
1140                 default:
1141                         if (rt & 1)
1142                                 psxBGEZ(regs_, code);
1143                         else
1144                                 psxBLTZ(regs_, code);
1145         }
1146 }
1147
1148 OP(psxHLE) {
1149         u32 hleCode;
1150         if (unlikely(!Config.HLE)) {
1151                 psxSWCx(regs_, code);
1152                 return;
1153         }
1154         hleCode = code & 0x03ffffff;
1155         if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) {
1156                 psxSWCx(regs_, code);
1157                 return;
1158         }
1159         psxHLEt[hleCode]();
1160 }
1161
1162 static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = {
1163         psxSPECIAL, psxREGIMM, psxJ   , psxJAL  , psxBEQ , psxBNE , psxBLEZ, psxBGTZ,
1164         psxADDI   , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI ,
1165         psxCOP0   , psxCOP1d , psxCOP2, psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1166         psxNULL   , psxCOP1d , psxCOP2d,psxCOP3d, psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1167         psxLB     , psxLH    , psxLWL , psxLW   , psxLBU , psxLHU , psxLWR , psxCOP3d,
1168         psxSB     , psxSH    , psxSWL , psxSW   , psxNULL, psxCOP1d,psxSWR , psxCOP3d,
1169         psxLWC0   , psxLWCx  , gteLWC2, psxLWCx , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1170         psxSWCx   , psxSWCx  , gteSWC2, psxHLE  , psxNULL, psxCOP1d,psxCOP2d,psxCOP3d,
1171 };
1172
1173 static void (INT_ATTR *psxSPC[64])(psxRegisters *regs_, u32 code) = {
1174         psxSLL , psxNULL , psxSRL , psxSRA , psxSLLV   , psxNULL , psxSRLV, psxSRAV,
1175         psxJR  , psxJALR , psxNULL, psxNULL, psxSYSCALL, psxBREAK, psxNULL, psxNULL,
1176         psxMFHI, psxMTHI , psxMFLO, psxMTLO, psxNULL   , psxNULL , psxNULL, psxNULL,
1177         psxMULT, psxMULTU, psxDIV , psxDIVU, psxNULL   , psxNULL , psxNULL, psxNULL,
1178         psxADD , psxADDU , psxSUB , psxSUBU, psxAND    , psxOR   , psxXOR , psxNOR ,
1179         psxNULL, psxNULL , psxSLT , psxSLTU, psxNULL   , psxNULL , psxNULL, psxNULL,
1180         psxNULL, psxNULL , psxNULL, psxNULL, psxNULL   , psxNULL , psxNULL, psxNULL,
1181         psxNULL, psxNULL , psxNULL, psxNULL, psxNULL   , psxNULL , psxNULL, psxNULL
1182 };
1183
1184 void (*psxCP2[64])(struct psxCP2Regs *regs) = {
1185         psxBASIC, gteRTPS , gteNULL , gteNULL, gteNULL, gteNULL , gteNCLIP, gteNULL, // 00
1186         gteNULL , gteNULL , gteNULL , gteNULL, gteOP  , gteNULL , gteNULL , gteNULL, // 08
1187         gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , gteNULL , gteNCDT , gteNULL, // 10
1188         gteNULL , gteNULL , gteNULL , gteNCCS, gteCC  , gteNULL , gteNCS  , gteNULL, // 18
1189         gteNCT  , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 20
1190         gteSQR  , gteDCPL , gteDPCT , gteNULL, gteNULL, gteAVSZ3, gteAVSZ4, gteNULL, // 28
1191         gteRTPT , gteNULL , gteNULL , gteNULL, gteNULL, gteNULL , gteNULL , gteNULL, // 30
1192         gteNULL , gteNULL , gteNULL , gteNULL, gteNULL, gteGPF  , gteGPL  , gteNCCT  // 38
1193 };
1194
1195 ///////////////////////////////////////////
1196
1197 static int intInit() {
1198         return 0;
1199 }
1200
1201 static void intReset() {
1202 }
1203
1204 static inline void execI_(u8 **memRLUT, psxRegisters *regs_) {
1205         u32 pc = regs_->pc;
1206         regs_->pc += 4;
1207         regs_->code = fetch(regs_, memRLUT, pc);
1208
1209         addCycle();
1210
1211         psxBSC[regs_->code >> 26](regs_, regs_->code);
1212 }
1213
1214 static void intExecute() {
1215         psxRegisters *regs_ = &psxRegs;
1216         u8 **memRLUT = psxMemRLUT;
1217         extern int stop;
1218
1219         while (!stop)
1220                 execI_(memRLUT, regs_);
1221 }
1222
1223 void intExecuteBlock(enum blockExecCaller caller) {
1224         psxRegisters *regs_ = &psxRegs;
1225         u8 **memRLUT = psxMemRLUT;
1226
1227         branch2 = 0;
1228         while (!branch2)
1229                 execI_(memRLUT, regs_);
1230 }
1231
1232 static void intClear(u32 Addr, u32 Size) {
1233 }
1234
1235 static void intNotify(enum R3000Anote note, void *data) {
1236         switch (note) {
1237         case R3000ACPU_NOTIFY_AFTER_LOAD:
1238                 setupCop(psxRegs.CP0.n.Status);
1239                 // fallthrough
1240         case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core?
1241                 memset(&ICache, 0xff, sizeof(ICache));
1242                 break;
1243         case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
1244         case R3000ACPU_NOTIFY_BEFORE_SAVE:
1245                 break;
1246         }
1247 }
1248
1249 static void setupCop(u32 sr)
1250 {
1251         if (sr & (1u << 29))
1252                 psxBSC[17] = psxCOP1;
1253         else
1254                 psxBSC[17] = psxCOP1d;
1255         if (sr & (1u << 30))
1256                 psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall;
1257         else
1258                 psxBSC[18] = psxCOP2d;
1259         if (sr & (1u << 31))
1260                 psxBSC[19] = psxCOP3;
1261         else
1262                 psxBSC[19] = psxCOP3d;
1263 }
1264
1265 void intApplyConfig() {
1266         int cycle_mult;
1267
1268         assert(psxBSC[50] == gteLWC2  || psxBSC[50] == gteLWC2_stall);
1269         assert(psxBSC[58] == gteSWC2  || psxBSC[58] == gteSWC2_stall);
1270         assert(psxSPC[16] == psxMFHI  || psxSPC[16] == psxMFHI_stall);
1271         assert(psxSPC[18] == psxMFLO  || psxSPC[18] == psxMFLO_stall);
1272         assert(psxSPC[24] == psxMULT  || psxSPC[24] == psxMULT_stall);
1273         assert(psxSPC[25] == psxMULTU || psxSPC[25] == psxMULTU_stall);
1274         assert(psxSPC[26] == psxDIV   || psxSPC[26] == psxDIV_stall);
1275         assert(psxSPC[27] == psxDIVU  || psxSPC[27] == psxDIVU_stall);
1276
1277         if (Config.DisableStalls) {
1278                 psxBSC[18] = psxCOP2;
1279                 psxBSC[50] = gteLWC2;
1280                 psxBSC[58] = gteSWC2;
1281                 psxSPC[16] = psxMFHI;
1282                 psxSPC[18] = psxMFLO;
1283                 psxSPC[24] = psxMULT;
1284                 psxSPC[25] = psxMULTU;
1285                 psxSPC[26] = psxDIV;
1286                 psxSPC[27] = psxDIVU;
1287         } else {
1288                 psxBSC[18] = psxCOP2_stall;
1289                 psxBSC[50] = gteLWC2_stall;
1290                 psxBSC[58] = gteSWC2_stall;
1291                 psxSPC[16] = psxMFHI_stall;
1292                 psxSPC[18] = psxMFLO_stall;
1293                 psxSPC[24] = psxMULT_stall;
1294                 psxSPC[25] = psxMULTU_stall;
1295                 psxSPC[26] = psxDIV_stall;
1296                 psxSPC[27] = psxDIVU_stall;
1297         }
1298         setupCop(psxRegs.CP0.n.Status);
1299
1300         // dynarec may occasionally call the interpreter, in such a case the
1301         // cache won't work (cache only works right if all fetches go through it)
1302         if (!Config.icache_emulation || psxCpu != &psxInt)
1303                 fetch = fetchNoCache;
1304         else
1305                 fetch = fetchICache;
1306
1307         cycle_mult = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT
1308                 ? Config.cycle_multiplier_override : Config.cycle_multiplier;
1309         psxRegs.subCycleStep = 0x10000 * cycle_mult / 100;
1310 }
1311
1312 static void intShutdown() {
1313 }
1314
1315 // single step (may do several ops in case of a branch)
1316 void execI() {
1317         execI_(psxMemRLUT, &psxRegs);
1318 }
1319
1320 R3000Acpu psxInt = {
1321         intInit,
1322         intReset,
1323         intExecute,
1324         intExecuteBlock,
1325         intClear,
1326         intNotify,
1327         intApplyConfig,
1328         intShutdown
1329 };