drc/gte: add some stall handling
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / emu_if.c
1 /*
2  * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
3  *
4  * This work is licensed under the terms of GNU GPL version 2 or later.
5  * See the COPYING file in the top-level directory.
6  */
7
8 #include <stdio.h>
9
10 #include "emu_if.h"
11 #include "pcsxmem.h"
12 #include "../psxhle.h"
13 #include "../r3000a.h"
14 #include "../cdrom.h"
15 #include "../psxdma.h"
16 #include "../mdec.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
19 #define FLAGLESS
20 #include "../gte.h"
21
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23
24 //#define evprintf printf
25 #define evprintf(...)
26
27 char invalid_code[0x100000];
28 u32 event_cycles[PSXINT_COUNT];
29
30 static void schedule_timeslice(void)
31 {
32         u32 i, c = psxRegs.cycle;
33         u32 irqs = psxRegs.interrupt;
34         s32 min, dif;
35
36         min = PSXCLK;
37         for (i = 0; irqs != 0; i++, irqs >>= 1) {
38                 if (!(irqs & 1))
39                         continue;
40                 dif = event_cycles[i] - c;
41                 //evprintf("  ev %d\n", dif);
42                 if (0 < dif && dif < min)
43                         min = dif;
44         }
45         next_interupt = c + min;
46 }
47
48 typedef void (irq_func)();
49
50 static irq_func * const irq_funcs[] = {
51         [PSXINT_SIO]    = sioInterrupt,
52         [PSXINT_CDR]    = cdrInterrupt,
53         [PSXINT_CDREAD] = cdrReadInterrupt,
54         [PSXINT_GPUDMA] = gpuInterrupt,
55         [PSXINT_MDECOUTDMA] = mdec1Interrupt,
56         [PSXINT_SPUDMA] = spuInterrupt,
57         [PSXINT_MDECINDMA] = mdec0Interrupt,
58         [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
59         [PSXINT_CDRDMA] = cdrDmaInterrupt,
60         [PSXINT_CDRLID] = cdrLidSeekInterrupt,
61         [PSXINT_CDRPLAY] = cdrPlayInterrupt,
62         [PSXINT_SPU_UPDATE] = spuUpdate,
63         [PSXINT_RCNT] = psxRcntUpdate,
64 };
65
66 /* local dupe of psxBranchTest, using event_cycles */
67 static void irq_test(void)
68 {
69         u32 irqs = psxRegs.interrupt;
70         u32 cycle = psxRegs.cycle;
71         u32 irq, irq_bits;
72
73         // irq_funcs() may queue more irqs
74         psxRegs.interrupt = 0;
75
76         for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
77                 if (!(irq_bits & 1))
78                         continue;
79                 if ((s32)(cycle - event_cycles[irq]) >= 0) {
80                         irqs &= ~(1 << irq);
81                         irq_funcs[irq]();
82                 }
83         }
84         psxRegs.interrupt |= irqs;
85
86         if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
87                 psxException(0x400, 0);
88                 pending_exception = 1;
89         }
90 }
91
92 void gen_interupt()
93 {
94         evprintf("  +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
95
96         irq_test();
97         //psxBranchTest();
98         //pending_exception = 1;
99
100         schedule_timeslice();
101
102         evprintf("  -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
103                 next_interupt, next_interupt - psxRegs.cycle);
104 }
105
106 // from interpreter
107 extern void MTC0(int reg, u32 val);
108
109 void pcsx_mtc0(u32 reg, u32 val)
110 {
111         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
112         MTC0(reg, val);
113         gen_interupt();
114         if (Cause & Status & 0x0300) // possible sw irq
115                 pending_exception = 1;
116 }
117
118 void pcsx_mtc0_ds(u32 reg, u32 val)
119 {
120         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
121         MTC0(reg, val);
122 }
123
124 void new_dyna_before_save(void)
125 {
126         psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat
127
128         // psxRegs.intCycle is always maintained, no need to convert
129 }
130
131 void new_dyna_after_save(void)
132 {
133         psxRegs.interrupt |= 1 << PSXINT_RCNT;
134 }
135
136 static void new_dyna_restore(void)
137 {
138         int i;
139         for (i = 0; i < PSXINT_COUNT; i++)
140                 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
141
142         event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
143         psxRegs.interrupt |=  1 << PSXINT_RCNT;
144         psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
145
146         new_dyna_pcsx_mem_load_state();
147 }
148
149 void new_dyna_freeze(void *f, int mode)
150 {
151         const char header_save[8] = "ariblks";
152         uint32_t addrs[1024 * 4];
153         int32_t size = 0;
154         int bytes;
155         char header[8];
156
157         if (mode != 0) { // save
158                 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
159                 if (size == 0)
160                         return;
161
162                 SaveFuncs.write(f, header_save, sizeof(header_save));
163                 SaveFuncs.write(f, &size, sizeof(size));
164                 SaveFuncs.write(f, addrs, size);
165         }
166         else {
167                 new_dyna_restore();
168
169                 bytes = SaveFuncs.read(f, header, sizeof(header));
170                 if (bytes != sizeof(header) || strcmp(header, header_save)) {
171                         if (bytes > 0)
172                                 SaveFuncs.seek(f, -bytes, SEEK_CUR);
173                         return;
174                 }
175                 SaveFuncs.read(f, &size, sizeof(size));
176                 if (size <= 0)
177                         return;
178                 if (size > sizeof(addrs)) {
179                         bytes = size - sizeof(addrs);
180                         SaveFuncs.seek(f, bytes, SEEK_CUR);
181                         size = sizeof(addrs);
182                 }
183                 bytes = SaveFuncs.read(f, addrs, size);
184                 if (bytes != size)
185                         return;
186
187                 if (psxCpu != &psxInt)
188                         new_dynarec_load_blocks(addrs, size);
189         }
190
191         //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
192 }
193
194 #ifndef DRC_DISABLE
195
196 /* GTE stuff */
197 void *gte_handlers[64];
198
199 void *gte_handlers_nf[64] = {
200         NULL      , gteRTPS_nf , NULL       , NULL      , NULL     , NULL       , gteNCLIP_nf, NULL      , // 00
201         NULL      , NULL       , NULL       , NULL      , gteOP_nf , NULL       , NULL       , NULL      , // 08
202         gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL       , gteNCDT_nf , NULL      , // 10
203         NULL      , NULL       , NULL       , gteNCCS_nf, gteCC_nf , NULL       , gteNCS_nf  , NULL      , // 18
204         gteNCT_nf , NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 20
205         gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL      , NULL     , gteAVSZ3_nf, gteAVSZ4_nf, NULL      , // 28 
206         gteRTPT_nf, NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 30
207         NULL      , NULL       , NULL       , NULL      , NULL     , gteGPF_nf  , gteGPL_nf  , gteNCCT_nf, // 38
208 };
209
210 const char *gte_regnames[64] = {
211         NULL  , "RTPS" , NULL   , NULL  , NULL , NULL   , "NCLIP", NULL  , // 00
212         NULL  , NULL   , NULL   , NULL  , "OP" , NULL   , NULL   , NULL  , // 08
213         "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL   , "NCDT" , NULL  , // 10
214         NULL  , NULL   , NULL   , "NCCS", "CC" , NULL   , "NCS"  , NULL  , // 18
215         "NCT" , NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 20
216         "SQR" , "DCPL" , "DPCT" , NULL  , NULL , "AVSZ3", "AVSZ4", NULL  , // 28 
217         "RTPT", NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 30
218         NULL  , NULL   , NULL   , NULL  , NULL , "GPF"  , "GPL"  , "NCCT", // 38
219 };
220
221 #define GCBIT(x) \
222         (1ll << (32+x))
223 #define GDBIT(x) \
224         (1ll << (x))
225 #define GCBITS3(b0,b1,b2) \
226         (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
227 #define GDBITS2(b0,b1) \
228         (GDBIT(b0) | GDBIT(b1))
229 #define GDBITS3(b0,b1,b2) \
230         (GDBITS2(b0,b1) | GDBIT(b2))
231 #define GDBITS4(b0,b1,b2,b3) \
232         (GDBITS3(b0,b1,b2) | GDBIT(b3))
233 #define GDBITS5(b0,b1,b2,b3,b4) \
234         (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
235 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
236         (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
237 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
238         (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
239 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
240         (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
241 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
242         (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
243 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
244         (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
245
246 const uint64_t gte_reg_reads[64] = {
247         [GTE_RTPS]  = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
248         [GTE_NCLIP] =                        GDBITS3(12,13,14),
249         [GTE_OP]    = GCBITS3(0,2,4)       | GDBITS3(9,10,11),
250         [GTE_DPCS]  = GCBITS3(21,22,23)    | GDBITS4(6,8,21,22),
251         [GTE_INTPL] = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
252         [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
253         [GTE_NCDS]  = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
254         [GTE_CDP]   = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
255         [GTE_NCDT]  = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
256         [GTE_NCCS]  = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
257         [GTE_CC]    = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
258         [GTE_NCS]   = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
259         [GTE_NCT]   = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
260         [GTE_SQR]   =                        GDBITS3(9,10,11),
261         [GTE_DCPL]  = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
262         [GTE_DPCT]  = GCBITS3(21,22,23)    | GDBITS4(8,20,21,22),
263         [GTE_AVSZ3] = GCBIT(29)            | GDBITS3(17,18,19),
264         [GTE_AVSZ4] = GCBIT(30)            | GDBITS4(16,17,18,19),
265         [GTE_RTPT]  = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
266         [GTE_GPF]   =                        GDBITS7(6,8,9,10,11,21,22),
267         [GTE_GPL]   =                        GDBITS10(6,8,9,10,11,21,22,25,26,27),
268         [GTE_NCCT]  = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
269 };
270
271 // note: this excludes gteFLAG that is always written to
272 const uint64_t gte_reg_writes[64] = {
273         [GTE_RTPS]  = 0x0f0f7f00ll,
274         [GTE_NCLIP] = GDBIT(24),
275         [GTE_OP]    = GDBITS6(9,10,11,25,26,27),
276         [GTE_DPCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
277         [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
278         [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
279         [GTE_NCDS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
280         [GTE_CDP]   = GDBITS9(9,10,11,20,21,22,25,26,27),
281         [GTE_NCDT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
282         [GTE_NCCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
283         [GTE_CC]    = GDBITS9(9,10,11,20,21,22,25,26,27),
284         [GTE_NCS]   = GDBITS9(9,10,11,20,21,22,25,26,27),
285         [GTE_NCT]   = GDBITS9(9,10,11,20,21,22,25,26,27),
286         [GTE_SQR]   = GDBITS6(9,10,11,25,26,27),
287         [GTE_DCPL]  = GDBITS9(9,10,11,20,21,22,25,26,27),
288         [GTE_DPCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
289         [GTE_AVSZ3] = GDBITS2(7,24),
290         [GTE_AVSZ4] = GDBITS2(7,24),
291         [GTE_RTPT]  = 0x0f0f7f00ll,
292         [GTE_GPF]   = GDBITS9(9,10,11,20,21,22,25,26,27),
293         [GTE_GPL]   = GDBITS9(9,10,11,20,21,22,25,26,27),
294         [GTE_NCCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
295 };
296
297 static int ari64_init()
298 {
299         static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
300         extern void (*psxCP2[64])();
301         extern void psxNULL();
302         extern unsigned char *out;
303         size_t i;
304
305         new_dynarec_init();
306         new_dyna_pcsx_mem_init();
307
308         for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
309                 if (psxCP2[i] != psxNULL)
310                         gte_handlers[i] = psxCP2[i];
311
312 #if defined(__arm__) && !defined(DRC_DBG)
313         gte_handlers[0x06] = gteNCLIP_arm;
314 #ifdef HAVE_ARMV5
315         gte_handlers_nf[0x01] = gteRTPS_nf_arm;
316         gte_handlers_nf[0x30] = gteRTPT_nf_arm;
317 #endif
318 #ifdef __ARM_NEON__
319         // compiler's _nf version is still a lot slower than neon
320         // _nf_arm RTPS is roughly the same, RTPT slower
321         gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
322         gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
323 #endif
324 #endif
325 #ifdef DRC_DBG
326         memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
327 #endif
328         psxH_ptr = psxH;
329         zeromem_ptr = zero_mem;
330         scratch_buf_ptr = scratch_buf;
331
332         SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n");
333         SysPrintf("%p/%p/%p/%p/%p\n",
334                 psxM, psxH, psxR, mem_rtab, out);
335
336         return 0;
337 }
338
339 static void ari64_reset()
340 {
341         printf("ari64_reset\n");
342         new_dyna_pcsx_mem_reset();
343         invalidate_all_pages();
344         new_dyna_restore();
345         pending_exception = 1;
346 }
347
348 // execute until predefined leave points
349 // (HLE softcall exit and BIOS fastboot end)
350 static void ari64_execute_until()
351 {
352         schedule_timeslice();
353
354         evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
355                 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
356
357         new_dyna_start(dynarec_local);
358
359         evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
360                 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
361 }
362
363 static void ari64_execute()
364 {
365         while (!stop) {
366                 ari64_execute_until();
367                 evprintf("drc left @%08x\n", psxRegs.pc);
368         }
369 }
370
371 static void ari64_clear(u32 addr, u32 size)
372 {
373         u32 start, end, main_ram;
374
375         size *= 4; /* PCSX uses DMA units (words) */
376
377         evprintf("ari64_clear %08x %04x\n", addr, size);
378
379         /* check for RAM mirrors */
380         main_ram = (addr & 0xffe00000) == 0x80000000;
381
382         start = addr >> 12;
383         end = (addr + size) >> 12;
384
385         for (; start <= end; start++)
386                 if (!main_ram || !invalid_code[start])
387                         invalidate_block(start);
388 }
389
390 #ifdef ICACHE_EMULATION
391 static void ari64_notify(int note, void *data) {
392         /*
393         Should be fixed when ARM dynarec has proper icache emulation.
394         switch (note)
395         {
396                 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
397                         break;
398                 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
399                 Sent from psxDma3().
400                 case R3000ACPU_NOTIFY_DMA3_EXE_LOAD:
401                 default:
402                         break;
403         }
404         */
405 }
406 #endif
407
408 static void ari64_shutdown()
409 {
410         new_dynarec_cleanup();
411         new_dyna_pcsx_mem_shutdown();
412 }
413
414 R3000Acpu psxRec = {
415         ari64_init,
416         ari64_reset,
417         ari64_execute,
418         ari64_execute_until,
419         ari64_clear,
420 #ifdef ICACHE_EMULATION
421         ari64_notify,
422 #endif
423         ari64_shutdown
424 };
425
426 #else // if DRC_DISABLE
427
428 unsigned int address;
429 int pending_exception, stop;
430 unsigned int next_interupt;
431 int new_dynarec_did_compile;
432 int cycle_multiplier;
433 int cycle_multiplier_override;
434 int new_dynarec_hacks_pergame;
435 int new_dynarec_hacks;
436 void *psxH_ptr;
437 void *zeromem_ptr;
438 u8 zero_mem[0x1000];
439 unsigned char *out;
440 void *mem_rtab;
441 void *scratch_buf_ptr;
442 void new_dynarec_init() {}
443 void new_dyna_start(void *context) {}
444 void new_dynarec_cleanup() {}
445 void new_dynarec_clear_full() {}
446 void invalidate_all_pages() {}
447 void invalidate_block(unsigned int block) {}
448 void new_dyna_pcsx_mem_init(void) {}
449 void new_dyna_pcsx_mem_reset(void) {}
450 void new_dyna_pcsx_mem_load_state(void) {}
451 void new_dyna_pcsx_mem_shutdown(void) {}
452 int  new_dynarec_save_blocks(void *save, int size) { return 0; }
453 void new_dynarec_load_blocks(const void *save, int size) {}
454 #endif
455
456 #ifdef DRC_DBG
457
458 #include <stddef.h>
459 static FILE *f;
460 extern u32 last_io_addr;
461
462 static void dump_mem(const char *fname, void *mem, size_t size)
463 {
464         FILE *f1 = fopen(fname, "wb");
465         if (f1 == NULL)
466                 f1 = fopen(strrchr(fname, '/') + 1, "wb");
467         fwrite(mem, 1, size, f1);
468         fclose(f1);
469 }
470
471 static u32 memcheck_read(u32 a)
472 {
473         if ((a >> 16) == 0x1f80)
474                 // scratchpad/IO
475                 return *(u32 *)(psxH + (a & 0xfffc));
476
477         if ((a >> 16) == 0x1f00)
478                 // parallel
479                 return *(u32 *)(psxP + (a & 0xfffc));
480
481 //      if ((a & ~0xe0600000) < 0x200000)
482         // RAM
483         return *(u32 *)(psxM + (a & 0x1ffffc));
484 }
485
486 #if 0
487 void do_insn_trace(void)
488 {
489         static psxRegisters oldregs;
490         static u32 old_io_addr = (u32)-1;
491         static u32 old_io_data = 0xbad0c0de;
492         static u32 event_cycles_o[PSXINT_COUNT];
493         u32 *allregs_p = (void *)&psxRegs;
494         u32 *allregs_o = (void *)&oldregs;
495         u32 io_data;
496         int i;
497         u8 byte;
498
499         //last_io_addr = 0x5e2c8;
500         if (f == NULL)
501                 f = fopen("tracelog", "wb");
502
503         // log reg changes
504         oldregs.code = psxRegs.code; // don't care
505         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
506                 if (allregs_p[i] != allregs_o[i]) {
507                         fwrite(&i, 1, 1, f);
508                         fwrite(&allregs_p[i], 1, 4, f);
509                         allregs_o[i] = allregs_p[i];
510                 }
511         }
512         // log event changes
513         for (i = 0; i < PSXINT_COUNT; i++) {
514                 if (event_cycles[i] != event_cycles_o[i]) {
515                         byte = 0xfc;
516                         fwrite(&byte, 1, 1, f);
517                         fwrite(&i, 1, 1, f);
518                         fwrite(&event_cycles[i], 1, 4, f);
519                         event_cycles_o[i] = event_cycles[i];
520                 }
521         }
522         // log last io
523         if (old_io_addr != last_io_addr) {
524                 byte = 0xfd;
525                 fwrite(&byte, 1, 1, f);
526                 fwrite(&last_io_addr, 1, 4, f);
527                 old_io_addr = last_io_addr;
528         }
529         io_data = memcheck_read(last_io_addr);
530         if (old_io_data != io_data) {
531                 byte = 0xfe;
532                 fwrite(&byte, 1, 1, f);
533                 fwrite(&io_data, 1, 4, f);
534                 old_io_data = io_data;
535         }
536         byte = 0xff;
537         fwrite(&byte, 1, 1, f);
538
539 #if 0
540         if (psxRegs.cycle == 190230) {
541                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
542                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
543                 printf("dumped\n");
544                 exit(1);
545         }
546 #endif
547 }
548 #endif
549
550 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
551         "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
552         "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
553         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
554         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
555         "lo",  "hi",
556         "C0_0",  "C0_1",  "C0_2",  "C0_3",  "C0_4",  "C0_5",  "C0_6",  "C0_7",
557         "C0_8",  "C0_9",  "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
558         "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
559         "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
560
561         "C2D0",  "C2D1",  "C2D2",  "C2D3",  "C2D4",  "C2D5",  "C2D6",  "C2D7",
562         "C2D8",  "C2D9",  "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
563         "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
564         "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
565
566         "C2C0",  "C2C1",  "C2C2",  "C2C3",  "C2C4",  "C2C5",  "C2C6",  "C2C7",
567         "C2C8",  "C2C9",  "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
568         "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
569         "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
570
571         "PC", "code", "cycle", "interrupt",
572 };
573
574 static struct {
575         int reg;
576         u32 val, val_expect;
577         u32 pc, cycle;
578 } miss_log[64];
579 static int miss_log_i;
580 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
581 #define miss_log_mask (miss_log_len-1)
582
583 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
584 {
585         miss_log[miss_log_i].reg = reg;
586         miss_log[miss_log_i].val = val;
587         miss_log[miss_log_i].val_expect = val_expect;
588         miss_log[miss_log_i].pc = pc;
589         miss_log[miss_log_i].cycle = cycle;
590         miss_log_i = (miss_log_i + 1) & miss_log_mask;
591 }
592
593 void breakme() {}
594
595 void do_insn_cmp(void)
596 {
597         static psxRegisters rregs;
598         static u32 mem_addr, mem_val;
599         u32 *allregs_p = (void *)&psxRegs;
600         u32 *allregs_e = (void *)&rregs;
601         static u32 ppc, failcount;
602         int i, ret, bad = 0, which_event = -1;
603         u32 ev_cycles = 0;
604         u8 code;
605
606         if (f == NULL)
607                 f = fopen("tracelog", "rb");
608
609         while (1) {
610                 if ((ret = fread(&code, 1, 1, f)) <= 0)
611                         break;
612                 if (ret <= 0)
613                         break;
614                 if (code == 0xff)
615                         break;
616                 switch (code) {
617                 case 0xfc:
618                         which_event = 0;
619                         fread(&which_event, 1, 1, f);
620                         fread(&ev_cycles, 1, 4, f);
621                         continue;
622                 case 0xfd:
623                         fread(&mem_addr, 1, 4, f);
624                         continue;
625                 case 0xfe:
626                         fread(&mem_val, 1, 4, f);
627                         continue;
628                 }
629                 fread(&allregs_e[code], 1, 4, f);
630         }
631
632         if (ret <= 0) {
633                 printf("EOF?\n");
634                 goto end;
635         }
636
637         psxRegs.code = rregs.code; // don't care
638         psxRegs.cycle = rregs.cycle;
639         psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
640
641         //if (psxRegs.cycle == 166172) breakme();
642
643         if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
644                         mem_val == memcheck_read(mem_addr)
645            ) {
646                 failcount = 0;
647                 goto ok;
648         }
649
650         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
651                 if (allregs_p[i] != allregs_e[i]) {
652                         miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
653                         bad++;
654                         if (i > 32+2)
655                                 goto end;
656                 }
657         }
658
659         if (mem_val != memcheck_read(mem_addr)) {
660                 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
661                 goto end;
662         }
663
664         if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
665                 printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles);
666                 goto end;
667         }
668
669         if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
670                 static int last_mcycle;
671                 if (last_mcycle != psxRegs.cycle >> 20) {
672                         printf("%u\n", psxRegs.cycle);
673                         last_mcycle = psxRegs.cycle >> 20;
674                 }
675                 failcount++;
676                 goto ok;
677         }
678
679 end:
680         for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
681                 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
682                         regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
683                         miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
684         printf("-- %d\n", bad);
685         for (i = 0; i < 8; i++)
686                 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
687                         i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
688         printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
689         dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
690         dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
691         exit(1);
692 ok:
693         psxRegs.cycle = rregs.cycle + 2; // sync timing
694         ppc = psxRegs.pc;
695 }
696
697 #endif