make icache implementation play nice with the dynarec
[pcsx_rearmed.git] / libpcsxcore / psxinterpreter.c
1 /***************************************************************************
2  *   Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team              *
3  *                                                                         *
4  *   This program is free software; you can redistribute it and/or modify  *
5  *   it under the terms of the GNU General Public License as published by  *
6  *   the Free Software Foundation; either version 2 of the License, or     *
7  *   (at your option) any later version.                                   *
8  *                                                                         *
9  *   This program is distributed in the hope that it will be useful,       *
10  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
12  *   GNU General Public License for more details.                          *
13  *                                                                         *
14  *   You should have received a copy of the GNU General Public License     *
15  *   along with this program; if not, write to the                         *
16  *   Free Software Foundation, Inc.,                                       *
17  *   51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA.           *
18  ***************************************************************************/
19
20 /*
21  * PSX assembly interpreter.
22  */
23
24 #include "psxcommon.h"
25 #include "r3000a.h"
26 #include "gte.h"
27 #include "psxhle.h"
28 #include "debug.h"
29 #include "psxinterpreter.h"
30 #include <assert.h>
31
32 static int branch = 0;
33 static int branch2 = 0;
34 static u32 branchPC;
35
36 // These macros are used to assemble the repassembler functions
37
38 #ifdef PSXCPU_LOG
39 #define debugI() PSXCPU_LOG("%s\n", disR3000AF(psxRegs.code, psxRegs.pc)); 
40 #else
41 #define debugI()
42 #endif
43
44 // Subsets
45 void (*psxBSC[64])();
46 void (*psxSPC[64])();
47 void (*psxREG[32])();
48 void (*psxCP0[32])();
49 void (*psxCP2[64])(struct psxCP2Regs *regs);
50 void (*psxCP2BSC[32])();
51
52 static u32 fetchNoCache(u32 pc)
53 {
54         u32 *code = (u32 *)PSXM(pc);
55         return ((code == NULL) ? 0 : SWAP32(*code));
56 }
57
58 /*
59 Formula One 2001 :
60 Use old CPU cache code when the RAM location is updated with new code (affects in-game racing)
61 */
62 static u8* ICache_Addr;
63 static u8* ICache_Code;
64 static u32 fetchICache(u32 pc)
65 {
66         uint32_t pc_bank, pc_offset, pc_cache;
67         uint8_t *IAddr, *ICode;
68
69         pc_bank = pc >> 24;
70         pc_offset = pc & 0xffffff;
71         pc_cache = pc & 0xfff;
72
73         IAddr = ICache_Addr;
74         ICode = ICache_Code;
75
76         // cached - RAM
77         if (pc_bank == 0x80 || pc_bank == 0x00)
78         {
79                 if (SWAP32(*(uint32_t *)(IAddr + pc_cache)) == pc_offset)
80                 {
81                         // Cache hit - return last opcode used
82                         return *(uint32_t *)(ICode + pc_cache);
83                 }
84                 else
85                 {
86                         // Cache miss - addresses don't match
87                         // - default: 0xffffffff (not init)
88
89                         // cache line is 4 bytes wide
90                         pc_offset &= ~0xf;
91                         pc_cache &= ~0xf;
92
93                         // address line
94                         *(uint32_t *)(IAddr + pc_cache + 0x0) = SWAP32(pc_offset + 0x0);
95                         *(uint32_t *)(IAddr + pc_cache + 0x4) = SWAP32(pc_offset + 0x4);
96                         *(uint32_t *)(IAddr + pc_cache + 0x8) = SWAP32(pc_offset + 0x8);
97                         *(uint32_t *)(IAddr + pc_cache + 0xc) = SWAP32(pc_offset + 0xc);
98
99                         // opcode line
100                         pc_offset = pc & ~0xf;
101                         *(uint32_t *)(ICode + pc_cache + 0x0) = psxMu32ref(pc_offset + 0x0);
102                         *(uint32_t *)(ICode + pc_cache + 0x4) = psxMu32ref(pc_offset + 0x4);
103                         *(uint32_t *)(ICode + pc_cache + 0x8) = psxMu32ref(pc_offset + 0x8);
104                         *(uint32_t *)(ICode + pc_cache + 0xc) = psxMu32ref(pc_offset + 0xc);
105                 }
106         }
107
108         /*
109         TODO: Probably should add cached BIOS
110         */
111         // default
112         return fetchNoCache(pc);
113 }
114
115 u32 (*fetch)(u32 pc) = fetchNoCache;
116
117 static void delayRead(int reg, u32 bpc) {
118         u32 rold, rnew;
119
120 //      SysPrintf("delayRead at %x!\n", psxRegs.pc);
121
122         rold = psxRegs.GPR.r[reg];
123         psxBSC[psxRegs.code >> 26](); // branch delay load
124         rnew = psxRegs.GPR.r[reg];
125
126         psxRegs.pc = bpc;
127
128         branch = 0;
129
130         psxRegs.GPR.r[reg] = rold;
131         execI(); // first branch opcode
132         psxRegs.GPR.r[reg] = rnew;
133
134         psxBranchTest();
135 }
136
137 static void delayWrite(int reg, u32 bpc) {
138
139 /*      SysPrintf("delayWrite at %x!\n", psxRegs.pc);
140
141         SysPrintf("%s\n", disR3000AF(psxRegs.code, psxRegs.pc-4));
142         SysPrintf("%s\n", disR3000AF(PSXMu32(bpc), bpc));*/
143
144         // no changes from normal behavior
145
146         psxBSC[psxRegs.code >> 26]();
147
148         branch = 0;
149         psxRegs.pc = bpc;
150
151         psxBranchTest();
152 }
153
154 static void delayReadWrite(int reg, u32 bpc) {
155
156 //      SysPrintf("delayReadWrite at %x!\n", psxRegs.pc);
157
158         // the branch delay load is skipped
159
160         branch = 0;
161         psxRegs.pc = bpc;
162
163         psxBranchTest();
164 }
165
166 // this defines shall be used with the tmp 
167 // of the next func (instead of _Funct_...)
168 #define _tFunct_  ((tmp      ) & 0x3F)  // The funct part of the instruction register 
169 #define _tRd_     ((tmp >> 11) & 0x1F)  // The rd part of the instruction register 
170 #define _tRt_     ((tmp >> 16) & 0x1F)  // The rt part of the instruction register 
171 #define _tRs_     ((tmp >> 21) & 0x1F)  // The rs part of the instruction register 
172 #define _tSa_     ((tmp >>  6) & 0x1F)  // The sa part of the instruction register
173
174 int psxTestLoadDelay(int reg, u32 tmp) {
175         if (tmp == 0) return 0; // NOP
176         switch (tmp >> 26) {
177                 case 0x00: // SPECIAL
178                         switch (_tFunct_) {
179                                 case 0x00: // SLL
180                                 case 0x02: case 0x03: // SRL/SRA
181                                         if (_tRd_ == reg && _tRt_ == reg) return 1; else
182                                         if (_tRt_ == reg) return 2; else
183                                         if (_tRd_ == reg) return 3;
184                                         break;
185
186                                 case 0x08: // JR
187                                         if (_tRs_ == reg) return 2;
188                                         break;
189                                 case 0x09: // JALR
190                                         if (_tRd_ == reg && _tRs_ == reg) return 1; else
191                                         if (_tRs_ == reg) return 2; else
192                                         if (_tRd_ == reg) return 3;
193                                         break;
194
195                                 // SYSCALL/BREAK just a break;
196
197                                 case 0x20: case 0x21: case 0x22: case 0x23:
198                                 case 0x24: case 0x25: case 0x26: case 0x27: 
199                                 case 0x2a: case 0x2b: // ADD/ADDU...
200                                 case 0x04: case 0x06: case 0x07: // SLLV...
201                                         if (_tRd_ == reg && (_tRt_ == reg || _tRs_ == reg)) return 1; else
202                                         if (_tRt_ == reg || _tRs_ == reg) return 2; else
203                                         if (_tRd_ == reg) return 3;
204                                         break;
205
206                                 case 0x10: case 0x12: // MFHI/MFLO
207                                         if (_tRd_ == reg) return 3;
208                                         break;
209                                 case 0x11: case 0x13: // MTHI/MTLO
210                                         if (_tRs_ == reg) return 2;
211                                         break;
212
213                                 case 0x18: case 0x19:
214                                 case 0x1a: case 0x1b: // MULT/DIV...
215                                         if (_tRt_ == reg || _tRs_ == reg) return 2;
216                                         break;
217                         }
218                         break;
219
220                 case 0x01: // REGIMM
221                         switch (_tRt_) {
222                                 case 0x00: case 0x01:
223                                 case 0x10: case 0x11: // BLTZ/BGEZ...
224                                         // Xenogears - lbu v0 / beq v0
225                                         // - no load delay (fixes battle loading)
226                                         break;
227
228                                         if (_tRs_ == reg) return 2;
229                                         break;
230                         }
231                         break;
232
233                 // J would be just a break;
234                 case 0x03: // JAL
235                         if (31 == reg) return 3;
236                         break;
237
238                 case 0x04: case 0x05: // BEQ/BNE
239                         // Xenogears - lbu v0 / beq v0
240                         // - no load delay (fixes battle loading)
241                         break;
242
243                         if (_tRs_ == reg || _tRt_ == reg) return 2;
244                         break;
245
246                 case 0x06: case 0x07: // BLEZ/BGTZ
247                         // Xenogears - lbu v0 / beq v0
248                         // - no load delay (fixes battle loading)
249                         break;
250
251                         if (_tRs_ == reg) return 2;
252                         break;
253
254                 case 0x08: case 0x09: case 0x0a: case 0x0b:
255                 case 0x0c: case 0x0d: case 0x0e: // ADDI/ADDIU...
256                         if (_tRt_ == reg && _tRs_ == reg) return 1; else
257                         if (_tRs_ == reg) return 2; else
258                         if (_tRt_ == reg) return 3;
259                         break;
260
261                 case 0x0f: // LUI
262                         if (_tRt_ == reg) return 3;
263                         break;
264
265                 case 0x10: // COP0
266                         switch (_tFunct_) {
267                                 case 0x00: // MFC0
268                                         if (_tRt_ == reg) return 3;
269                                         break;
270                                 case 0x02: // CFC0
271                                         if (_tRt_ == reg) return 3;
272                                         break;
273                                 case 0x04: // MTC0
274                                         if (_tRt_ == reg) return 2;
275                                         break;
276                                 case 0x06: // CTC0
277                                         if (_tRt_ == reg) return 2;
278                                         break;
279                                 // RFE just a break;
280                         }
281                         break;
282
283                 case 0x12: // COP2
284                         switch (_tFunct_) {
285                                 case 0x00: 
286                                         switch (_tRs_) {
287                                                 case 0x00: // MFC2
288                                                         if (_tRt_ == reg) return 3;
289                                                         break;
290                                                 case 0x02: // CFC2
291                                                         if (_tRt_ == reg) return 3;
292                                                         break;
293                                                 case 0x04: // MTC2
294                                                         if (_tRt_ == reg) return 2;
295                                                         break;
296                                                 case 0x06: // CTC2
297                                                         if (_tRt_ == reg) return 2;
298                                                         break;
299                                         }
300                                         break;
301                                 // RTPS... break;
302                         }
303                         break;
304
305                 case 0x22: case 0x26: // LWL/LWR
306                         if (_tRt_ == reg) return 3; else
307                         if (_tRs_ == reg) return 2;
308                         break;
309
310                 case 0x20: case 0x21: case 0x23:
311                 case 0x24: case 0x25: // LB/LH/LW/LBU/LHU
312                         if (_tRt_ == reg && _tRs_ == reg) return 1; else
313                         if (_tRs_ == reg) return 2; else
314                         if (_tRt_ == reg) return 3;
315                         break;
316
317                 case 0x28: case 0x29: case 0x2a:
318                 case 0x2b: case 0x2e: // SB/SH/SWL/SW/SWR
319                         if (_tRt_ == reg || _tRs_ == reg) return 2;
320                         break;
321
322                 case 0x32: case 0x3a: // LWC2/SWC2
323                         if (_tRs_ == reg) return 2;
324                         break;
325         }
326
327         return 0;
328 }
329
330 void psxDelayTest(int reg, u32 bpc) {
331         u32 tmp = fetch(psxRegs.pc);
332         branch = 1;
333
334         switch (psxTestLoadDelay(reg, tmp)) {
335                 case 1:
336                         delayReadWrite(reg, bpc); return;
337                 case 2:
338                         delayRead(reg, bpc); return;
339                 case 3:
340                         delayWrite(reg, bpc); return;
341         }
342         psxBSC[psxRegs.code >> 26]();
343
344         branch = 0;
345         psxRegs.pc = bpc;
346
347         psxBranchTest();
348 }
349
350 static u32 psxBranchNoDelay(void) {
351         u32 temp;
352
353         psxRegs.code = fetch(psxRegs.pc);
354         switch (_Op_) {
355                 case 0x00: // SPECIAL
356                         switch (_Funct_) {
357                                 case 0x08: // JR
358                                         return _u32(_rRs_);
359                                 case 0x09: // JALR
360                                         temp = _u32(_rRs_);
361                                         if (_Rd_) { _SetLink(_Rd_); }
362                                         return temp;
363                         }
364                         break;
365                 case 0x01: // REGIMM
366                         switch (_Rt_) {
367                                 case 0x00: // BLTZ
368                                         if (_i32(_rRs_) < 0)
369                                                 return _BranchTarget_;
370                                         break;
371                                 case 0x01: // BGEZ
372                                         if (_i32(_rRs_) >= 0)
373                                                 return _BranchTarget_;
374                                         break;
375                                 case 0x08: // BLTZAL
376                                         if (_i32(_rRs_) < 0) {
377                                                 _SetLink(31);
378                                                 return _BranchTarget_;
379                                         }
380                                         break;
381                                 case 0x09: // BGEZAL
382                                         if (_i32(_rRs_) >= 0) {
383                                                 _SetLink(31);
384                                                 return _BranchTarget_;
385                                         }
386                                         break;
387                         }
388                         break;
389                 case 0x02: // J
390                         return _JumpTarget_;
391                 case 0x03: // JAL
392                         _SetLink(31);
393                         return _JumpTarget_;
394                 case 0x04: // BEQ
395                         if (_i32(_rRs_) == _i32(_rRt_))
396                                 return _BranchTarget_;
397                         break;
398                 case 0x05: // BNE
399                         if (_i32(_rRs_) != _i32(_rRt_))
400                                 return _BranchTarget_;
401                         break;
402                 case 0x06: // BLEZ
403                         if (_i32(_rRs_) <= 0)
404                                 return _BranchTarget_;
405                         break;
406                 case 0x07: // BGTZ
407                         if (_i32(_rRs_) > 0)
408                                 return _BranchTarget_;
409                         break;
410         }
411
412         return (u32)-1;
413 }
414
415 static int psxDelayBranchExec(u32 tar) {
416         execI();
417
418         branch = 0;
419         psxRegs.pc = tar;
420         psxRegs.cycle += BIAS;
421         psxBranchTest();
422         return 1;
423 }
424
425 static int psxDelayBranchTest(u32 tar1) {
426         u32 tar2, tmp1, tmp2;
427
428         tar2 = psxBranchNoDelay();
429         if (tar2 == (u32)-1)
430                 return 0;
431
432         debugI();
433
434         /*
435          * Branch in delay slot:
436          * - execute 1 instruction at tar1
437          * - jump to tar2 (target of branch in delay slot; this branch
438          *   has no normal delay slot, instruction at tar1 was fetched instead)
439          */
440         psxRegs.pc = tar1;
441         tmp1 = psxBranchNoDelay();
442         if (tmp1 == (u32)-1) {
443                 return psxDelayBranchExec(tar2);
444         }
445         debugI();
446         psxRegs.cycle += BIAS;
447
448         /*
449          * Got a branch at tar1:
450          * - execute 1 instruction at tar2
451          * - jump to target of that branch (tmp1)
452          */
453         psxRegs.pc = tar2;
454         tmp2 = psxBranchNoDelay();
455         if (tmp2 == (u32)-1) {
456                 return psxDelayBranchExec(tmp1);
457         }
458         debugI();
459         psxRegs.cycle += BIAS;
460
461         /*
462          * Got a branch at tar2:
463          * - execute 1 instruction at tmp1
464          * - jump to target of that branch (tmp2)
465          */
466         psxRegs.pc = tmp1;
467         return psxDelayBranchExec(tmp2);
468 }
469
470 static void doBranch(u32 tar) {
471         u32 tmp;
472
473         branch2 = branch = 1;
474         branchPC = tar;
475
476         // check for branch in delay slot
477         if (psxDelayBranchTest(tar))
478                 return;
479
480         psxRegs.code = fetch(psxRegs.pc);
481
482         debugI();
483
484         psxRegs.pc += 4;
485         psxRegs.cycle += BIAS;
486
487         // check for load delay
488         tmp = psxRegs.code >> 26;
489         switch (tmp) {
490                 case 0x10: // COP0
491                         switch (_Rs_) {
492                                 case 0x00: // MFC0
493                                 case 0x02: // CFC0
494                                         psxDelayTest(_Rt_, branchPC);
495                                         return;
496                         }
497                         break;
498                 case 0x12: // COP2
499                         switch (_Funct_) {
500                                 case 0x00:
501                                         switch (_Rs_) {
502                                                 case 0x00: // MFC2
503                                                 case 0x02: // CFC2
504                                                         psxDelayTest(_Rt_, branchPC);
505                                                         return;
506                                         }
507                                         break;
508                         }
509                         break;
510                 case 0x32: // LWC2
511                         psxDelayTest(_Rt_, branchPC);
512                         return;
513                 default:
514                         if (tmp >= 0x20 && tmp <= 0x26) { // LB/LH/LWL/LW/LBU/LHU/LWR
515                                 psxDelayTest(_Rt_, branchPC);
516                                 return;
517                         }
518                         break;
519         }
520
521         psxBSC[psxRegs.code >> 26]();
522
523         branch = 0;
524         psxRegs.pc = branchPC;
525
526         psxBranchTest();
527 }
528
529 /*********************************************************
530 * Arithmetic with immediate operand                      *
531 * Format:  OP rt, rs, immediate                          *
532 *********************************************************/
533 void psxADDI()  { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; }            // Rt = Rs + Im         (Exception on Integer Overflow)
534 void psxADDIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) + _Imm_ ; }            // Rt = Rs + Im
535 void psxANDI()  { if (!_Rt_) return; _rRt_ = _u32(_rRs_) & _ImmU_; }            // Rt = Rs And Im
536 void psxORI()   { if (!_Rt_) return; _rRt_ = _u32(_rRs_) | _ImmU_; }            // Rt = Rs Or  Im
537 void psxXORI()  { if (!_Rt_) return; _rRt_ = _u32(_rRs_) ^ _ImmU_; }            // Rt = Rs Xor Im
538 void psxSLTI()  { if (!_Rt_) return; _rRt_ = _i32(_rRs_) < _Imm_ ; }            // Rt = Rs < Im         (Signed)
539 void psxSLTIU() { if (!_Rt_) return; _rRt_ = _u32(_rRs_) < ((u32)_Imm_); }              // Rt = Rs < Im         (Unsigned)
540
541 /*********************************************************
542 * Register arithmetic                                    *
543 * Format:  OP rd, rs, rt                                 *
544 *********************************************************/
545 void psxADD()   { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); }       // Rd = Rs + Rt         (Exception on Integer Overflow)
546 void psxADDU()  { if (!_Rd_) return; _rRd_ = _u32(_rRs_) + _u32(_rRt_); }       // Rd = Rs + Rt
547 void psxSUB()   { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); }       // Rd = Rs - Rt         (Exception on Integer Overflow)
548 void psxSUBU()  { if (!_Rd_) return; _rRd_ = _u32(_rRs_) - _u32(_rRt_); }       // Rd = Rs - Rt
549 void psxAND()   { if (!_Rd_) return; _rRd_ = _u32(_rRs_) & _u32(_rRt_); }       // Rd = Rs And Rt
550 void psxOR()    { if (!_Rd_) return; _rRd_ = _u32(_rRs_) | _u32(_rRt_); }       // Rd = Rs Or  Rt
551 void psxXOR()   { if (!_Rd_) return; _rRd_ = _u32(_rRs_) ^ _u32(_rRt_); }       // Rd = Rs Xor Rt
552 void psxNOR()   { if (!_Rd_) return; _rRd_ =~(_u32(_rRs_) | _u32(_rRt_)); }// Rd = Rs Nor Rt
553 void psxSLT()   { if (!_Rd_) return; _rRd_ = _i32(_rRs_) < _i32(_rRt_); }       // Rd = Rs < Rt         (Signed)
554 void psxSLTU()  { if (!_Rd_) return; _rRd_ = _u32(_rRs_) < _u32(_rRt_); }       // Rd = Rs < Rt         (Unsigned)
555
556 /*********************************************************
557 * Register mult/div & Register trap logic                *
558 * Format:  OP rs, rt                                     *
559 *********************************************************/
560 void psxDIV() {
561     if (!_i32(_rRt_)) {
562         _i32(_rHi_) = _i32(_rRs_);
563         if (_i32(_rRs_) & 0x80000000) {
564             _i32(_rLo_) = 1;
565         } else {
566             _i32(_rLo_) = 0xFFFFFFFF;
567         }
568 /*
569  * Notaz said that this was "not needed" for ARM platforms and could slow it down so let's disable for ARM. 
570  * This fixes a crash issue that can happen when running Amidog's CPU test.
571  * (It still stays stuck to a black screen but at least it doesn't crash anymore)
572  */
573 #if !defined(__arm__) && !defined(__aarch64__)
574     } else if (_i32(_rRs_) == 0x80000000 && _i32(_rRt_) == 0xFFFFFFFF) {
575         _i32(_rLo_) = 0x80000000;
576         _i32(_rHi_) = 0;
577 #endif
578     } else {
579         _i32(_rLo_) = _i32(_rRs_) / _i32(_rRt_);
580         _i32(_rHi_) = _i32(_rRs_) % _i32(_rRt_);
581     }
582 }
583
584 void psxDIV_stall() {
585         psxRegs.muldivBusyCycle = psxRegs.cycle + 37;
586         psxDIV();
587 }
588
589 void psxDIVU() {
590         if (_rRt_ != 0) {
591                 _rLo_ = _rRs_ / _rRt_;
592                 _rHi_ = _rRs_ % _rRt_;
593         }
594         else {
595                 _i32(_rLo_) = 0xffffffff;
596                 _i32(_rHi_) = _i32(_rRs_);
597         }
598 }
599
600 void psxDIVU_stall() {
601         psxRegs.muldivBusyCycle = psxRegs.cycle + 37;
602         psxDIVU();
603 }
604
605 void psxMULT() {
606         u64 res = (s64)((s64)_i32(_rRs_) * (s64)_i32(_rRt_));
607
608         psxRegs.GPR.n.lo = (u32)(res & 0xffffffff);
609         psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
610 }
611
612 void psxMULT_stall() {
613         // approximate, but maybe good enough
614         u32 rs = _rRs_;
615         u32 lz = __builtin_clz(((rs ^ ((s32)rs >> 21)) | 1));
616         u32 c = 7 + (2 - (lz / 11)) * 4;
617         psxRegs.muldivBusyCycle = psxRegs.cycle + c;
618         psxMULT();
619 }
620
621 void psxMULTU() {
622         u64 res = (u64)((u64)_u32(_rRs_) * (u64)_u32(_rRt_));
623
624         psxRegs.GPR.n.lo = (u32)(res & 0xffffffff);
625         psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
626 }
627
628 void psxMULTU_stall() {
629         // approximate, but maybe good enough
630         u32 lz = __builtin_clz(_rRs_ | 1);
631         u32 c = 7 + (2 - (lz / 11)) * 4;
632         psxRegs.muldivBusyCycle = psxRegs.cycle + c;
633         psxMULTU();
634 }
635
636 /*********************************************************
637 * Register branch logic                                  *
638 * Format:  OP rs, offset                                 *
639 *********************************************************/
640 #define RepZBranchi32(op)      if(_i32(_rRs_) op 0) doBranch(_BranchTarget_);
641 #define RepZBranchLinki32(op)  { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } }
642
643 void psxBGEZ()   { RepZBranchi32(>=) }      // Branch if Rs >= 0
644 void psxBGEZAL() { RepZBranchLinki32(>=) }  // Branch if Rs >= 0 and link
645 void psxBGTZ()   { RepZBranchi32(>) }       // Branch if Rs >  0
646 void psxBLEZ()   { RepZBranchi32(<=) }      // Branch if Rs <= 0
647 void psxBLTZ()   { RepZBranchi32(<) }       // Branch if Rs <  0
648 void psxBLTZAL() { RepZBranchLinki32(<) }   // Branch if Rs <  0 and link
649
650 /*********************************************************
651 * Shift arithmetic with constant shift                   *
652 * Format:  OP rd, rt, sa                                 *
653 *********************************************************/
654 void psxSLL() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << _Sa_; } // Rd = Rt << sa
655 void psxSRA() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (arithmetic)
656 void psxSRL() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> _Sa_; } // Rd = Rt >> sa (logical)
657
658 /*********************************************************
659 * Shift arithmetic with variant register shift           *
660 * Format:  OP rd, rt, rs                                 *
661 *********************************************************/
662 void psxSLLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) << (_u32(_rRs_) & 0x1F); } // Rd = Rt << rs
663 void psxSRAV() { if (!_Rd_) return; _i32(_rRd_) = _i32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (arithmetic)
664 void psxSRLV() { if (!_Rd_) return; _u32(_rRd_) = _u32(_rRt_) >> (_u32(_rRs_) & 0x1F); } // Rd = Rt >> rs (logical)
665
666 /*********************************************************
667 * Load higher 16 bits of the first word in GPR with imm  *
668 * Format:  OP rt, immediate                              *
669 *********************************************************/
670 void psxLUI() { if (!_Rt_) return; _u32(_rRt_) = psxRegs.code << 16; } // Upper halfword of Rt = Im
671
672 /*********************************************************
673 * Move from HI/LO to GPR                                 *
674 * Format:  OP rd                                         *
675 *********************************************************/
676 void psxMFHI() { if (!_Rd_) return; _rRd_ = _rHi_; } // Rd = Hi
677 void psxMFLO() { if (!_Rd_) return; _rRd_ = _rLo_; } // Rd = Lo
678
679 static void mflohiCheckStall(void)
680 {
681         u32 left = psxRegs.muldivBusyCycle - psxRegs.cycle;
682         if (left <= 37) {
683                 //printf("muldiv stall %u\n", left);
684                 psxRegs.cycle = psxRegs.muldivBusyCycle;
685         }
686 }
687
688 void psxMFHI_stall() { mflohiCheckStall(); psxMFHI(); }
689 void psxMFLO_stall() { mflohiCheckStall(); psxMFLO(); }
690
691 /*********************************************************
692 * Move to GPR to HI/LO & Register jump                   *
693 * Format:  OP rs                                         *
694 *********************************************************/
695 void psxMTHI() { _rHi_ = _rRs_; } // Hi = Rs
696 void psxMTLO() { _rLo_ = _rRs_; } // Lo = Rs
697
698 /*********************************************************
699 * Special purpose instructions                           *
700 * Format:  OP                                            *
701 *********************************************************/
702 void psxBREAK() {
703         psxRegs.pc -= 4;
704         psxException(0x24, branch);
705 }
706
707 void psxSYSCALL() {
708         psxRegs.pc -= 4;
709         psxException(0x20, branch);
710 }
711
712 void psxRFE() {
713 //      SysPrintf("psxRFE\n");
714         psxRegs.CP0.n.Status = (psxRegs.CP0.n.Status & 0xfffffff0) |
715                                                   ((psxRegs.CP0.n.Status & 0x3c) >> 2);
716         psxTestSWInts();
717 }
718
719 /*********************************************************
720 * Register branch logic                                  *
721 * Format:  OP rs, rt, offset                             *
722 *********************************************************/
723 #define RepBranchi32(op)      if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_);
724
725 void psxBEQ() { RepBranchi32(==) }  // Branch if Rs == Rt
726 void psxBNE() { RepBranchi32(!=) }  // Branch if Rs != Rt
727
728 /*********************************************************
729 * Jump to target                                         *
730 * Format:  OP target                                     *
731 *********************************************************/
732 void psxJ()   {               doBranch(_JumpTarget_); }
733 void psxJAL() { _SetLink(31); doBranch(_JumpTarget_); }
734
735 /*********************************************************
736 * Register jump                                          *
737 * Format:  OP rs, rd                                     *
738 *********************************************************/
739 void psxJR()   {
740         doBranch(_rRs_ & ~3);
741         psxJumpTest();
742 }
743
744 void psxJALR() {
745         u32 temp = _u32(_rRs_);
746         if (_Rd_) { _SetLink(_Rd_); }
747         doBranch(temp & ~3);
748 }
749
750 /*********************************************************
751 * Load and store for GPR                                 *
752 * Format:  OP rt, offset(base)                           *
753 *********************************************************/
754
755 #define _oB_ (_u32(_rRs_) + _Imm_)
756
757 void psxLB() {
758         if (_Rt_) {
759                 _i32(_rRt_) = (signed char)psxMemRead8(_oB_); 
760         } else {
761                 psxMemRead8(_oB_); 
762         }
763 }
764
765 void psxLBU() {
766         if (_Rt_) {
767                 _u32(_rRt_) = psxMemRead8(_oB_);
768         } else {
769                 psxMemRead8(_oB_); 
770         }
771 }
772
773 void psxLH() {
774         if (_Rt_) {
775                 _i32(_rRt_) = (short)psxMemRead16(_oB_);
776         } else {
777                 psxMemRead16(_oB_);
778         }
779 }
780
781 void psxLHU() {
782         if (_Rt_) {
783                 _u32(_rRt_) = psxMemRead16(_oB_);
784         } else {
785                 psxMemRead16(_oB_);
786         }
787 }
788
789 void psxLW() {
790         if (_Rt_) {
791                 _u32(_rRt_) = psxMemRead32(_oB_);
792         } else {
793                 psxMemRead32(_oB_);
794         }
795 }
796
797 u32 LWL_MASK[4] = { 0xffffff, 0xffff, 0xff, 0 };
798 u32 LWL_SHIFT[4] = { 24, 16, 8, 0 };
799
800 void psxLWL() {
801         u32 addr = _oB_;
802         u32 shift = addr & 3;
803         u32 mem = psxMemRead32(addr & ~3);
804
805         if (!_Rt_) return;
806         _u32(_rRt_) =   ( _u32(_rRt_) & LWL_MASK[shift]) | 
807                                         ( mem << LWL_SHIFT[shift]);
808
809         /*
810         Mem = 1234.  Reg = abcd
811
812         0   4bcd   (mem << 24) | (reg & 0x00ffffff)
813         1   34cd   (mem << 16) | (reg & 0x0000ffff)
814         2   234d   (mem <<  8) | (reg & 0x000000ff)
815         3   1234   (mem      ) | (reg & 0x00000000)
816         */
817 }
818
819 u32 LWR_MASK[4] = { 0, 0xff000000, 0xffff0000, 0xffffff00 };
820 u32 LWR_SHIFT[4] = { 0, 8, 16, 24 };
821
822 void psxLWR() {
823         u32 addr = _oB_;
824         u32 shift = addr & 3;
825         u32 mem = psxMemRead32(addr & ~3);
826
827         if (!_Rt_) return;
828         _u32(_rRt_) =   ( _u32(_rRt_) & LWR_MASK[shift]) | 
829                                         ( mem >> LWR_SHIFT[shift]);
830
831         /*
832         Mem = 1234.  Reg = abcd
833
834         0   1234   (mem      ) | (reg & 0x00000000)
835         1   a123   (mem >>  8) | (reg & 0xff000000)
836         2   ab12   (mem >> 16) | (reg & 0xffff0000)
837         3   abc1   (mem >> 24) | (reg & 0xffffff00)
838         */
839 }
840
841 void psxSB() { psxMemWrite8 (_oB_, _rRt_ &   0xff); }
842 void psxSH() { psxMemWrite16(_oB_, _rRt_ & 0xffff); }
843 void psxSW() { psxMemWrite32(_oB_, _rRt_); }
844
845 u32 SWL_MASK[4] = { 0xffffff00, 0xffff0000, 0xff000000, 0 };
846 u32 SWL_SHIFT[4] = { 24, 16, 8, 0 };
847
848 void psxSWL() {
849         u32 addr = _oB_;
850         u32 shift = addr & 3;
851         u32 mem = psxMemRead32(addr & ~3);
852
853         psxMemWrite32(addr & ~3,  (_u32(_rRt_) >> SWL_SHIFT[shift]) |
854                              (  mem & SWL_MASK[shift]) );
855         /*
856         Mem = 1234.  Reg = abcd
857
858         0   123a   (reg >> 24) | (mem & 0xffffff00)
859         1   12ab   (reg >> 16) | (mem & 0xffff0000)
860         2   1abc   (reg >>  8) | (mem & 0xff000000)
861         3   abcd   (reg      ) | (mem & 0x00000000)
862         */
863 }
864
865 u32 SWR_MASK[4] = { 0, 0xff, 0xffff, 0xffffff };
866 u32 SWR_SHIFT[4] = { 0, 8, 16, 24 };
867
868 void psxSWR() {
869         u32 addr = _oB_;
870         u32 shift = addr & 3;
871         u32 mem = psxMemRead32(addr & ~3);
872
873         psxMemWrite32(addr & ~3,  (_u32(_rRt_) << SWR_SHIFT[shift]) |
874                              (  mem & SWR_MASK[shift]) );
875
876         /*
877         Mem = 1234.  Reg = abcd
878
879         0   abcd   (reg      ) | (mem & 0x00000000)
880         1   bcd4   (reg <<  8) | (mem & 0x000000ff)
881         2   cd34   (reg << 16) | (mem & 0x0000ffff)
882         3   d234   (reg << 24) | (mem & 0x00ffffff)
883         */
884 }
885
886 /*********************************************************
887 * Moves between GPR and COPx                             *
888 * Format:  OP rt, fs                                     *
889 *********************************************************/
890 void psxMFC0() { if (!_Rt_) return; _i32(_rRt_) = (int)_rFs_; }
891 void psxCFC0() { if (!_Rt_) return; _i32(_rRt_) = (int)_rFs_; }
892
893 void psxTestSWInts() {
894         if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300 &&
895            psxRegs.CP0.n.Status & 0x1) {
896                 psxRegs.CP0.n.Cause &= ~0x7c;
897                 psxException(psxRegs.CP0.n.Cause, branch);
898         }
899 }
900
901 void MTC0(int reg, u32 val) {
902 //      SysPrintf("MTC0 %d: %x\n", reg, val);
903         switch (reg) {
904                 case 12: // Status
905                         psxRegs.CP0.r[12] = val;
906                         psxTestSWInts();
907                         break;
908
909                 case 13: // Cause
910                         psxRegs.CP0.n.Cause &= ~0x0300;
911                         psxRegs.CP0.n.Cause |= val & 0x0300;
912                         psxTestSWInts();
913                         break;
914
915                 default:
916                         psxRegs.CP0.r[reg] = val;
917                         break;
918         }
919 }
920
921 void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); }
922 void psxCTC0() { MTC0(_Rd_, _u32(_rRt_)); }
923
924 /*********************************************************
925 * Unknow instruction (would generate an exception)       *
926 * Format:  ?                                             *
927 *********************************************************/
928 void psxNULL() { 
929 #ifdef PSXCPU_LOG
930         PSXCPU_LOG("psx: Unimplemented op %x\n", psxRegs.code);
931 #endif
932 }
933
934 void psxSPECIAL() {
935         psxSPC[_Funct_]();
936 }
937
938 void psxREGIMM() {
939         psxREG[_Rt_]();
940 }
941
942 void psxCOP0() {
943         psxCP0[_Rs_]();
944 }
945
946 void psxCOP2() {
947         psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D);
948 }
949
950 void psxCOP2_stall() {
951         u32 f = _Funct_;
952         gteCheckStall(f);
953         psxCP2[f]((struct psxCP2Regs *)&psxRegs.CP2D);
954 }
955
956 void psxBASIC(struct psxCP2Regs *regs) {
957         psxCP2BSC[_Rs_]();
958 }
959
960 void psxHLE() {
961 //      psxHLEt[psxRegs.code & 0xffff]();
962 //      psxHLEt[psxRegs.code & 0x07]();         // HDHOSHY experimental patch
963     uint32_t hleCode = psxRegs.code & 0x03ffffff;
964     if (hleCode >= (sizeof(psxHLEt) / sizeof(psxHLEt[0]))) {
965         psxNULL();
966     } else {
967         psxHLEt[hleCode]();
968     }
969 }
970
971 void (*psxBSC[64])() = {
972         psxSPECIAL, psxREGIMM, psxJ   , psxJAL  , psxBEQ , psxBNE , psxBLEZ, psxBGTZ,
973         psxADDI   , psxADDIU , psxSLTI, psxSLTIU, psxANDI, psxORI , psxXORI, psxLUI ,
974         psxCOP0   , psxNULL  , psxCOP2, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL,
975         psxNULL   , psxNULL  , psxNULL, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL,
976         psxLB     , psxLH    , psxLWL , psxLW   , psxLBU , psxLHU , psxLWR , psxNULL,
977         psxSB     , psxSH    , psxSWL , psxSW   , psxNULL, psxNULL, psxSWR , psxNULL, 
978         psxNULL   , psxNULL  , gteLWC2, psxNULL , psxNULL, psxNULL, psxNULL, psxNULL,
979         psxNULL   , psxNULL  , gteSWC2, psxHLE  , psxNULL, psxNULL, psxNULL, psxNULL 
980 };
981
982
983 void (*psxSPC[64])() = {
984         psxSLL , psxNULL , psxSRL , psxSRA , psxSLLV   , psxNULL , psxSRLV, psxSRAV,
985         psxJR  , psxJALR , psxNULL, psxNULL, psxSYSCALL, psxBREAK, psxNULL, psxNULL,
986         psxMFHI, psxMTHI , psxMFLO, psxMTLO, psxNULL   , psxNULL , psxNULL, psxNULL,
987         psxMULT, psxMULTU, psxDIV , psxDIVU, psxNULL   , psxNULL , psxNULL, psxNULL,
988         psxADD , psxADDU , psxSUB , psxSUBU, psxAND    , psxOR   , psxXOR , psxNOR ,
989         psxNULL, psxNULL , psxSLT , psxSLTU, psxNULL   , psxNULL , psxNULL, psxNULL,
990         psxNULL, psxNULL , psxNULL, psxNULL, psxNULL   , psxNULL , psxNULL, psxNULL,
991         psxNULL, psxNULL , psxNULL, psxNULL, psxNULL   , psxNULL , psxNULL, psxNULL
992 };
993
994 void (*psxREG[32])() = {
995         psxBLTZ  , psxBGEZ  , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
996         psxNULL  , psxNULL  , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
997         psxBLTZAL, psxBGEZAL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
998         psxNULL  , psxNULL  , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL
999 };
1000
1001 void (*psxCP0[32])() = {
1002         psxMFC0, psxNULL, psxCFC0, psxNULL, psxMTC0, psxNULL, psxCTC0, psxNULL,
1003         psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
1004         psxRFE , psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
1005         psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL
1006 };
1007
1008 void (*psxCP2[64])(struct psxCP2Regs *regs) = {
1009         psxBASIC, gteRTPS , psxNULL , psxNULL, psxNULL, psxNULL , gteNCLIP, psxNULL, // 00
1010         psxNULL , psxNULL , psxNULL , psxNULL, gteOP  , psxNULL , psxNULL , psxNULL, // 08
1011         gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , psxNULL , gteNCDT , psxNULL, // 10
1012         psxNULL , psxNULL , psxNULL , gteNCCS, gteCC  , psxNULL , gteNCS  , psxNULL, // 18
1013         gteNCT  , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 20
1014         gteSQR  , gteDCPL , gteDPCT , psxNULL, psxNULL, gteAVSZ3, gteAVSZ4, psxNULL, // 28 
1015         gteRTPT , psxNULL , psxNULL , psxNULL, psxNULL, psxNULL , psxNULL , psxNULL, // 30
1016         psxNULL , psxNULL , psxNULL , psxNULL, psxNULL, gteGPF  , gteGPL  , gteNCCT  // 38
1017 };
1018
1019 void (*psxCP2BSC[32])() = {
1020         gteMFC2, psxNULL, gteCFC2, psxNULL, gteMTC2, psxNULL, gteCTC2, psxNULL,
1021         psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
1022         psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL,
1023         psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL
1024 };
1025
1026
1027 ///////////////////////////////////////////
1028
1029 static int intInit() {
1030         /* We have to allocate the icache memory even if 
1031          * the user has not enabled it as otherwise it can cause issues.
1032          */
1033         if (!ICache_Addr)
1034         {
1035                 ICache_Addr = malloc(0x1000);
1036                 if (!ICache_Addr)
1037                 {
1038                         return -1;
1039                 }
1040         }
1041
1042         if (!ICache_Code)
1043         {
1044                 ICache_Code = malloc(0x1000);
1045                 if (!ICache_Code)
1046                 {
1047                         return -1;
1048                 }
1049         }
1050         memset(ICache_Addr, 0xff, 0x1000);
1051         memset(ICache_Code, 0xff, 0x1000);
1052         return 0;
1053 }
1054
1055 static void intReset() {
1056         memset(ICache_Addr, 0xff, 0x1000);
1057         memset(ICache_Code, 0xff, 0x1000);
1058 }
1059
1060 void intExecute() {
1061         extern int stop;
1062         for (;!stop;) 
1063                 execI();
1064 }
1065
1066 void intExecuteBlock() {
1067         branch2 = 0;
1068         while (!branch2) execI();
1069 }
1070
1071 static void intClear(u32 Addr, u32 Size) {
1072 }
1073
1074 void intNotify (int note, void *data) {
1075         /* Gameblabla - Only clear the icache if it's isolated */
1076         if (note == R3000ACPU_NOTIFY_CACHE_ISOLATED)
1077         {
1078                 memset(ICache_Addr, 0xff, 0x1000);
1079                 memset(ICache_Code, 0xff, 0x1000);
1080         }
1081 }
1082
1083 void intApplyConfig() {
1084         assert(psxBSC[18] == psxCOP2  || psxBSC[18] == psxCOP2_stall);
1085         assert(psxBSC[50] == gteLWC2  || psxBSC[50] == gteLWC2_stall);
1086         assert(psxBSC[58] == gteSWC2  || psxBSC[58] == gteSWC2_stall);
1087         assert(psxSPC[16] == psxMFHI  || psxSPC[16] == psxMFHI_stall);
1088         assert(psxSPC[18] == psxMFLO  || psxSPC[18] == psxMFLO_stall);
1089         assert(psxSPC[24] == psxMULT  || psxSPC[24] == psxMULT_stall);
1090         assert(psxSPC[25] == psxMULTU || psxSPC[25] == psxMULTU_stall);
1091         assert(psxSPC[26] == psxDIV   || psxSPC[26] == psxDIV_stall);
1092         assert(psxSPC[27] == psxDIVU  || psxSPC[27] == psxDIVU_stall);
1093
1094         if (Config.DisableStalls) {
1095                 psxBSC[18] = psxCOP2;
1096                 psxBSC[50] = gteLWC2;
1097                 psxBSC[58] = gteSWC2;
1098                 psxSPC[16] = psxMFHI;
1099                 psxSPC[18] = psxMFLO;
1100                 psxSPC[24] = psxMULT;
1101                 psxSPC[25] = psxMULTU;
1102                 psxSPC[26] = psxDIV;
1103                 psxSPC[27] = psxDIVU;
1104         } else {
1105                 psxBSC[18] = psxCOP2_stall;
1106                 psxBSC[50] = gteLWC2_stall;
1107                 psxBSC[58] = gteSWC2_stall;
1108                 psxSPC[16] = psxMFHI_stall;
1109                 psxSPC[18] = psxMFLO_stall;
1110                 psxSPC[24] = psxMULT_stall;
1111                 psxSPC[25] = psxMULTU_stall;
1112                 psxSPC[26] = psxDIV_stall;
1113                 psxSPC[27] = psxDIVU_stall;
1114         }
1115
1116         // dynarec may occasionally call the interpreter, in such a case the
1117         // cache won't work (cache only works right if all fetches go through it)
1118         if (!Config.icache_emulation || psxCpu != &psxInt)
1119                 fetch = fetchNoCache;
1120         else
1121                 fetch = fetchICache;
1122 }
1123
1124 static void intShutdown() {
1125         if (ICache_Addr)
1126         {
1127                 free(ICache_Addr);
1128                 ICache_Addr = NULL;
1129         }
1130
1131         if (ICache_Code)
1132         {
1133                 free(ICache_Code);
1134                 ICache_Code = NULL;
1135         }
1136 }
1137
1138 // interpreter execution
1139 void execI() {
1140         psxRegs.code = fetch(psxRegs.pc);
1141
1142         debugI();
1143
1144         if (Config.Debug) ProcessDebug();
1145
1146         psxRegs.pc += 4;
1147         psxRegs.cycle += BIAS;
1148
1149         psxBSC[psxRegs.code >> 26]();
1150 }
1151
1152 R3000Acpu psxInt = {
1153         intInit,
1154         intReset,
1155         intExecute,
1156         intExecuteBlock,
1157         intClear,
1158         intNotify,
1159         intApplyConfig,
1160         intShutdown
1161 };