Icache emulation from PCSX Redux + Senquack changes from PCSX4ALL (#198)
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / emu_if.c
1 /*
2  * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
3  *
4  * This work is licensed under the terms of GNU GPL version 2 or later.
5  * See the COPYING file in the top-level directory.
6  */
7
8 #include <stdio.h>
9
10 #include "emu_if.h"
11 #include "pcsxmem.h"
12 #include "../psxhle.h"
13 #include "../r3000a.h"
14 #include "../cdrom.h"
15 #include "../psxdma.h"
16 #include "../mdec.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
19 #define FLAGLESS
20 #include "../gte.h"
21
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23
24 //#define evprintf printf
25 #define evprintf(...)
26
27 char invalid_code[0x100000];
28 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
29 u32 event_cycles[PSXINT_COUNT];
30
31 static void schedule_timeslice(void)
32 {
33         u32 i, c = psxRegs.cycle;
34         u32 irqs = psxRegs.interrupt;
35         s32 min, dif;
36
37         min = PSXCLK;
38         for (i = 0; irqs != 0; i++, irqs >>= 1) {
39                 if (!(irqs & 1))
40                         continue;
41                 dif = event_cycles[i] - c;
42                 //evprintf("  ev %d\n", dif);
43                 if (0 < dif && dif < min)
44                         min = dif;
45         }
46         next_interupt = c + min;
47 }
48
49 typedef void (irq_func)();
50
51 static irq_func * const irq_funcs[] = {
52         [PSXINT_SIO]    = sioInterrupt,
53         [PSXINT_CDR]    = cdrInterrupt,
54         [PSXINT_CDREAD] = cdrReadInterrupt,
55         [PSXINT_GPUDMA] = gpuInterrupt,
56         [PSXINT_MDECOUTDMA] = mdec1Interrupt,
57         [PSXINT_SPUDMA] = spuInterrupt,
58         [PSXINT_MDECINDMA] = mdec0Interrupt,
59         [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
60         [PSXINT_CDRDMA] = cdrDmaInterrupt,
61         [PSXINT_CDRLID] = cdrLidSeekInterrupt,
62         [PSXINT_CDRPLAY] = cdrPlayInterrupt,
63         [PSXINT_SPU_UPDATE] = spuUpdate,
64         [PSXINT_RCNT] = psxRcntUpdate,
65 };
66
67 /* local dupe of psxBranchTest, using event_cycles */
68 static void irq_test(void)
69 {
70         u32 irqs = psxRegs.interrupt;
71         u32 cycle = psxRegs.cycle;
72         u32 irq, irq_bits;
73
74         // irq_funcs() may queue more irqs
75         psxRegs.interrupt = 0;
76
77         for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
78                 if (!(irq_bits & 1))
79                         continue;
80                 if ((s32)(cycle - event_cycles[irq]) >= 0) {
81                         irqs &= ~(1 << irq);
82                         irq_funcs[irq]();
83                 }
84         }
85         psxRegs.interrupt |= irqs;
86
87         if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
88                 psxException(0x400, 0);
89                 pending_exception = 1;
90         }
91 }
92
93 void gen_interupt()
94 {
95         evprintf("  +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
96
97         irq_test();
98         //psxBranchTest();
99         //pending_exception = 1;
100
101         schedule_timeslice();
102
103         evprintf("  -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
104                 next_interupt, next_interupt - psxRegs.cycle);
105 }
106
107 // from interpreter
108 extern void MTC0(int reg, u32 val);
109
110 void pcsx_mtc0(u32 reg, u32 val)
111 {
112         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
113         MTC0(reg, val);
114         gen_interupt();
115         if (Cause & Status & 0x0300) // possible sw irq
116                 pending_exception = 1;
117 }
118
119 void pcsx_mtc0_ds(u32 reg, u32 val)
120 {
121         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
122         MTC0(reg, val);
123 }
124
125 void new_dyna_before_save(void)
126 {
127         psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat
128
129         // psxRegs.intCycle is always maintained, no need to convert
130 }
131
132 void new_dyna_after_save(void)
133 {
134         psxRegs.interrupt |= 1 << PSXINT_RCNT;
135 }
136
137 static void new_dyna_restore(void)
138 {
139         int i;
140         for (i = 0; i < PSXINT_COUNT; i++)
141                 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
142
143         event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
144         psxRegs.interrupt |=  1 << PSXINT_RCNT;
145         psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
146
147         new_dyna_pcsx_mem_load_state();
148 }
149
150 void new_dyna_freeze(void *f, int mode)
151 {
152         const char header_save[8] = "ariblks";
153         uint32_t addrs[1024 * 4];
154         int32_t size = 0;
155         int bytes;
156         char header[8];
157
158         if (mode != 0) { // save
159                 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
160                 if (size == 0)
161                         return;
162
163                 SaveFuncs.write(f, header_save, sizeof(header_save));
164                 SaveFuncs.write(f, &size, sizeof(size));
165                 SaveFuncs.write(f, addrs, size);
166         }
167         else {
168                 new_dyna_restore();
169
170                 bytes = SaveFuncs.read(f, header, sizeof(header));
171                 if (bytes != sizeof(header) || strcmp(header, header_save)) {
172                         if (bytes > 0)
173                                 SaveFuncs.seek(f, -bytes, SEEK_CUR);
174                         return;
175                 }
176                 SaveFuncs.read(f, &size, sizeof(size));
177                 if (size <= 0)
178                         return;
179                 if (size > sizeof(addrs)) {
180                         bytes = size - sizeof(addrs);
181                         SaveFuncs.seek(f, bytes, SEEK_CUR);
182                         size = sizeof(addrs);
183                 }
184                 bytes = SaveFuncs.read(f, addrs, size);
185                 if (bytes != size)
186                         return;
187
188                 new_dynarec_load_blocks(addrs, size);
189         }
190
191         //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
192 }
193
194 /* GTE stuff */
195 void *gte_handlers[64];
196
197 void *gte_handlers_nf[64] = {
198         NULL      , gteRTPS_nf , NULL       , NULL      , NULL     , NULL       , gteNCLIP_nf, NULL      , // 00
199         NULL      , NULL       , NULL       , NULL      , gteOP_nf , NULL       , NULL       , NULL      , // 08
200         gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL       , gteNCDT_nf , NULL      , // 10
201         NULL      , NULL       , NULL       , gteNCCS_nf, gteCC_nf , NULL       , gteNCS_nf  , NULL      , // 18
202         gteNCT_nf , NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 20
203         gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL      , NULL     , gteAVSZ3_nf, gteAVSZ4_nf, NULL      , // 28 
204         gteRTPT_nf, NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 30
205         NULL      , NULL       , NULL       , NULL      , NULL     , gteGPF_nf  , gteGPL_nf  , gteNCCT_nf, // 38
206 };
207
208 const char *gte_regnames[64] = {
209         NULL  , "RTPS" , NULL   , NULL  , NULL , NULL   , "NCLIP", NULL  , // 00
210         NULL  , NULL   , NULL   , NULL  , "OP" , NULL   , NULL   , NULL  , // 08
211         "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL   , "NCDT" , NULL  , // 10
212         NULL  , NULL   , NULL   , "NCCS", "CC" , NULL   , "NCS"  , NULL  , // 18
213         "NCT" , NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 20
214         "SQR" , "DCPL" , "DPCT" , NULL  , NULL , "AVSZ3", "AVSZ4", NULL  , // 28 
215         "RTPT", NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 30
216         NULL  , NULL   , NULL   , NULL  , NULL , "GPF"  , "GPL"  , "NCCT", // 38
217 };
218
219 /* from gte.txt.. not sure if this is any good. */
220 const char gte_cycletab[64] = {
221         /*   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f */
222          0, 15,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  6,  0,  0,  0,
223          8,  8,  8, 19, 13,  0, 44,  0,  0,  0,  0, 17, 11,  0, 14,  0,
224         30,  0,  0,  0,  0,  0,  0,  0,  5,  8, 17,  0,  0,  5,  6,  0,
225         23,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5, 39,
226 };
227
228 #define GCBIT(x) \
229         (1ll << (32+x))
230 #define GDBIT(x) \
231         (1ll << (x))
232 #define GCBITS3(b0,b1,b2) \
233         (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
234 #define GDBITS2(b0,b1) \
235         (GDBIT(b0) | GDBIT(b1))
236 #define GDBITS3(b0,b1,b2) \
237         (GDBITS2(b0,b1) | GDBIT(b2))
238 #define GDBITS4(b0,b1,b2,b3) \
239         (GDBITS3(b0,b1,b2) | GDBIT(b3))
240 #define GDBITS5(b0,b1,b2,b3,b4) \
241         (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
242 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
243         (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
244 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
245         (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
246 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
247         (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
248 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
249         (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
250 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
251         (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
252
253 const uint64_t gte_reg_reads[64] = {
254         [GTE_RTPS]  = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
255         [GTE_NCLIP] =                        GDBITS3(12,13,14),
256         [GTE_OP]    = GCBITS3(0,2,4)       | GDBITS3(9,10,11),
257         [GTE_DPCS]  = GCBITS3(21,22,23)    | GDBITS4(6,8,21,22),
258         [GTE_INTPL] = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
259         [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
260         [GTE_NCDS]  = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
261         [GTE_CDP]   = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
262         [GTE_NCDT]  = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
263         [GTE_NCCS]  = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
264         [GTE_CC]    = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
265         [GTE_NCS]   = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
266         [GTE_NCT]   = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
267         [GTE_SQR]   =                        GDBITS3(9,10,11),
268         [GTE_DCPL]  = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
269         [GTE_DPCT]  = GCBITS3(21,22,23)    | GDBITS4(8,20,21,22),
270         [GTE_AVSZ3] = GCBIT(29)            | GDBITS3(17,18,19),
271         [GTE_AVSZ4] = GCBIT(30)            | GDBITS4(16,17,18,19),
272         [GTE_RTPT]  = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
273         [GTE_GPF]   =                        GDBITS7(6,8,9,10,11,21,22),
274         [GTE_GPL]   =                        GDBITS10(6,8,9,10,11,21,22,25,26,27),
275         [GTE_NCCT]  = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
276 };
277
278 // note: this excludes gteFLAG that is always written to
279 const uint64_t gte_reg_writes[64] = {
280         [GTE_RTPS]  = 0x0f0f7f00ll,
281         [GTE_NCLIP] = GDBIT(24),
282         [GTE_OP]    = GDBITS6(9,10,11,25,26,27),
283         [GTE_DPCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
284         [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
285         [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
286         [GTE_NCDS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
287         [GTE_CDP]   = GDBITS9(9,10,11,20,21,22,25,26,27),
288         [GTE_NCDT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
289         [GTE_NCCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
290         [GTE_CC]    = GDBITS9(9,10,11,20,21,22,25,26,27),
291         [GTE_NCS]   = GDBITS9(9,10,11,20,21,22,25,26,27),
292         [GTE_NCT]   = GDBITS9(9,10,11,20,21,22,25,26,27),
293         [GTE_SQR]   = GDBITS6(9,10,11,25,26,27),
294         [GTE_DCPL]  = GDBITS9(9,10,11,20,21,22,25,26,27),
295         [GTE_DPCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
296         [GTE_AVSZ3] = GDBITS2(7,24),
297         [GTE_AVSZ4] = GDBITS2(7,24),
298         [GTE_RTPT]  = 0x0f0f7f00ll,
299         [GTE_GPF]   = GDBITS9(9,10,11,20,21,22,25,26,27),
300         [GTE_GPL]   = GDBITS9(9,10,11,20,21,22,25,26,27),
301         [GTE_NCCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
302 };
303
304 static int ari64_init()
305 {
306         extern void (*psxCP2[64])();
307         extern void psxNULL();
308         extern unsigned char *out;
309         size_t i;
310
311         new_dynarec_init();
312         new_dyna_pcsx_mem_init();
313
314         for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
315                 if (psxCP2[i] != psxNULL)
316                         gte_handlers[i] = psxCP2[i];
317
318 #if defined(__arm__) && !defined(DRC_DBG)
319         gte_handlers[0x06] = gteNCLIP_arm;
320 #ifdef HAVE_ARMV5
321         gte_handlers_nf[0x01] = gteRTPS_nf_arm;
322         gte_handlers_nf[0x30] = gteRTPT_nf_arm;
323 #endif
324 #ifdef __ARM_NEON__
325         // compiler's _nf version is still a lot slower than neon
326         // _nf_arm RTPS is roughly the same, RTPT slower
327         gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
328         gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
329 #endif
330 #endif
331 #ifdef DRC_DBG
332         memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
333 #endif
334         psxH_ptr = psxH;
335         zeromem_ptr = zero_mem;
336         scratch_buf_ptr = scratch_buf;
337
338         SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n");
339         SysPrintf("%08x/%08x/%08x/%08x/%08x\n",
340                 psxM, psxH, psxR, mem_rtab, out);
341
342         return 0;
343 }
344
345 static void ari64_reset()
346 {
347         printf("ari64_reset\n");
348         new_dyna_pcsx_mem_reset();
349         invalidate_all_pages();
350         new_dyna_restore();
351         pending_exception = 1;
352 }
353
354 // execute until predefined leave points
355 // (HLE softcall exit and BIOS fastboot end)
356 static void ari64_execute_until()
357 {
358         schedule_timeslice();
359
360         evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
361                 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
362
363         new_dyna_start();
364
365         evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
366                 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
367 }
368
369 static void ari64_execute()
370 {
371         while (!stop) {
372                 ari64_execute_until();
373                 evprintf("drc left @%08x\n", psxRegs.pc);
374         }
375 }
376
377 static void ari64_clear(u32 addr, u32 size)
378 {
379         u32 start, end, main_ram;
380
381         size *= 4; /* PCSX uses DMA units (words) */
382
383         evprintf("ari64_clear %08x %04x\n", addr, size);
384
385         /* check for RAM mirrors */
386         main_ram = (addr & 0xffe00000) == 0x80000000;
387
388         start = addr >> 12;
389         end = (addr + size) >> 12;
390
391         for (; start <= end; start++)
392                 if (!main_ram || !invalid_code[start])
393                         invalidate_block(start);
394 }
395
396 #ifdef ICACHE_EMULATION
397 static void ari64_notify(int note, void *data) {
398         /*
399         Should be fixed when ARM dynarec has proper icache emulation.
400         switch (note)
401         {
402                 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
403                         break;
404                 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
405                 Sent from psxDma3().
406                 case R3000ACPU_NOTIFY_DMA3_EXE_LOAD:
407                 default:
408                         break;
409         }
410         */
411 }
412 #endif
413
414 static void ari64_shutdown()
415 {
416         new_dynarec_cleanup();
417         new_dyna_pcsx_mem_shutdown();
418 }
419
420 extern void intExecute();
421 extern void intExecuteT();
422 extern void intExecuteBlock();
423 extern void intExecuteBlockT();
424 #ifndef DRC_DBG
425 #define intExecuteT intExecute
426 #define intExecuteBlockT intExecuteBlock
427 #endif
428
429 R3000Acpu psxRec = {
430         ari64_init,
431         ari64_reset,
432 #ifndef DRC_DISABLE
433         ari64_execute,
434         ari64_execute_until,
435 #else
436         intExecuteT,
437         intExecuteBlockT,
438 #endif
439         ari64_clear,
440 #ifdef ICACHE_EMULATION
441         ari64_notify,
442 #endif
443         ari64_shutdown
444 };
445
446 // TODO: rm
447 #ifndef DRC_DBG
448 void do_insn_trace() {}
449 void do_insn_cmp() {}
450 #endif
451
452 #ifdef DRC_DISABLE
453 unsigned int address;
454 int pending_exception, stop;
455 unsigned int next_interupt;
456 int new_dynarec_did_compile;
457 int cycle_multiplier;
458 int new_dynarec_hacks;
459 void *psxH_ptr;
460 void *zeromem_ptr;
461 u8 zero_mem[0x1000];
462 unsigned char *out;
463 void *mem_rtab;
464 void *scratch_buf_ptr;
465 void new_dynarec_init() { (void)ari64_execute; }
466 void new_dyna_start() {}
467 void new_dynarec_cleanup() {}
468 void new_dynarec_clear_full() {}
469 void invalidate_all_pages() {}
470 void invalidate_block(unsigned int block) {}
471 void new_dyna_pcsx_mem_init(void) {}
472 void new_dyna_pcsx_mem_reset(void) {}
473 void new_dyna_pcsx_mem_load_state(void) {}
474 void new_dyna_pcsx_mem_shutdown(void) {}
475 int  new_dynarec_save_blocks(void *save, int size) { return 0; }
476 void new_dynarec_load_blocks(const void *save, int size) {}
477 #endif
478
479 #ifdef DRC_DBG
480
481 #include <stddef.h>
482 static FILE *f;
483 extern u32 last_io_addr;
484
485 static void dump_mem(const char *fname, void *mem, size_t size)
486 {
487         FILE *f1 = fopen(fname, "wb");
488         if (f1 == NULL)
489                 f1 = fopen(strrchr(fname, '/') + 1, "wb");
490         fwrite(mem, 1, size, f1);
491         fclose(f1);
492 }
493
494 static u32 memcheck_read(u32 a)
495 {
496         if ((a >> 16) == 0x1f80)
497                 // scratchpad/IO
498                 return *(u32 *)(psxH + (a & 0xfffc));
499
500         if ((a >> 16) == 0x1f00)
501                 // parallel
502                 return *(u32 *)(psxP + (a & 0xfffc));
503
504 //      if ((a & ~0xe0600000) < 0x200000)
505         // RAM
506         return *(u32 *)(psxM + (a & 0x1ffffc));
507 }
508
509 void do_insn_trace(void)
510 {
511         static psxRegisters oldregs;
512         static u32 old_io_addr = (u32)-1;
513         static u32 old_io_data = 0xbad0c0de;
514         static u32 event_cycles_o[PSXINT_COUNT];
515         u32 *allregs_p = (void *)&psxRegs;
516         u32 *allregs_o = (void *)&oldregs;
517         u32 io_data;
518         int i;
519         u8 byte;
520
521         //last_io_addr = 0x5e2c8;
522         if (f == NULL)
523                 f = fopen("tracelog", "wb");
524
525         // log reg changes
526         oldregs.code = psxRegs.code; // don't care
527         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
528                 if (allregs_p[i] != allregs_o[i]) {
529                         fwrite(&i, 1, 1, f);
530                         fwrite(&allregs_p[i], 1, 4, f);
531                         allregs_o[i] = allregs_p[i];
532                 }
533         }
534         // log event changes
535         for (i = 0; i < PSXINT_COUNT; i++) {
536                 if (event_cycles[i] != event_cycles_o[i]) {
537                         byte = 0xfc;
538                         fwrite(&byte, 1, 1, f);
539                         fwrite(&i, 1, 1, f);
540                         fwrite(&event_cycles[i], 1, 4, f);
541                         event_cycles_o[i] = event_cycles[i];
542                 }
543         }
544         // log last io
545         if (old_io_addr != last_io_addr) {
546                 byte = 0xfd;
547                 fwrite(&byte, 1, 1, f);
548                 fwrite(&last_io_addr, 1, 4, f);
549                 old_io_addr = last_io_addr;
550         }
551         io_data = memcheck_read(last_io_addr);
552         if (old_io_data != io_data) {
553                 byte = 0xfe;
554                 fwrite(&byte, 1, 1, f);
555                 fwrite(&io_data, 1, 4, f);
556                 old_io_data = io_data;
557         }
558         byte = 0xff;
559         fwrite(&byte, 1, 1, f);
560
561 #if 0
562         if (psxRegs.cycle == 190230) {
563                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
564                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
565                 printf("dumped\n");
566                 exit(1);
567         }
568 #endif
569 }
570
571 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
572         "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
573         "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
574         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
575         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
576         "lo",  "hi",
577         "C0_0",  "C0_1",  "C0_2",  "C0_3",  "C0_4",  "C0_5",  "C0_6",  "C0_7",
578         "C0_8",  "C0_9",  "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
579         "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
580         "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
581
582         "C2D0",  "C2D1",  "C2D2",  "C2D3",  "C2D4",  "C2D5",  "C2D6",  "C2D7",
583         "C2D8",  "C2D9",  "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
584         "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
585         "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
586
587         "C2C0",  "C2C1",  "C2C2",  "C2C3",  "C2C4",  "C2C5",  "C2C6",  "C2C7",
588         "C2C8",  "C2C9",  "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
589         "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
590         "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
591
592         "PC", "code", "cycle", "interrupt",
593 };
594
595 static struct {
596         int reg;
597         u32 val, val_expect;
598         u32 pc, cycle;
599 } miss_log[64];
600 static int miss_log_i;
601 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
602 #define miss_log_mask (miss_log_len-1)
603
604 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
605 {
606         miss_log[miss_log_i].reg = reg;
607         miss_log[miss_log_i].val = val;
608         miss_log[miss_log_i].val_expect = val_expect;
609         miss_log[miss_log_i].pc = pc;
610         miss_log[miss_log_i].cycle = cycle;
611         miss_log_i = (miss_log_i + 1) & miss_log_mask;
612 }
613
614 void breakme() {}
615
616 void do_insn_cmp(void)
617 {
618         static psxRegisters rregs;
619         static u32 mem_addr, mem_val;
620         u32 *allregs_p = (void *)&psxRegs;
621         u32 *allregs_e = (void *)&rregs;
622         static u32 ppc, failcount;
623         int i, ret, bad = 0, which_event = -1;
624         u32 ev_cycles = 0;
625         u8 code;
626
627         if (f == NULL)
628                 f = fopen("tracelog", "rb");
629
630         while (1) {
631                 if ((ret = fread(&code, 1, 1, f)) <= 0)
632                         break;
633                 if (ret <= 0)
634                         break;
635                 if (code == 0xff)
636                         break;
637                 switch (code) {
638                 case 0xfc:
639                         which_event = 0;
640                         fread(&which_event, 1, 1, f);
641                         fread(&ev_cycles, 1, 4, f);
642                         continue;
643                 case 0xfd:
644                         fread(&mem_addr, 1, 4, f);
645                         continue;
646                 case 0xfe:
647                         fread(&mem_val, 1, 4, f);
648                         continue;
649                 }
650                 fread(&allregs_e[code], 1, 4, f);
651         }
652
653         if (ret <= 0) {
654                 printf("EOF?\n");
655                 goto end;
656         }
657
658         psxRegs.code = rregs.code; // don't care
659         psxRegs.cycle = rregs.cycle;
660         psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
661
662         //if (psxRegs.cycle == 166172) breakme();
663
664         if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
665                         mem_val == memcheck_read(mem_addr)
666            ) {
667                 failcount = 0;
668                 goto ok;
669         }
670
671         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
672                 if (allregs_p[i] != allregs_e[i]) {
673                         miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
674                         bad++;
675                 }
676         }
677
678         if (mem_val != memcheck_read(mem_addr)) {
679                 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
680                 goto end;
681         }
682
683         if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
684                 printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles);
685                 goto end;
686         }
687
688         if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
689                 static int last_mcycle;
690                 if (last_mcycle != psxRegs.cycle >> 20) {
691                         printf("%u\n", psxRegs.cycle);
692                         last_mcycle = psxRegs.cycle >> 20;
693                 }
694                 failcount++;
695                 goto ok;
696         }
697
698 end:
699         for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
700                 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
701                         regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
702                         miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
703         printf("-- %d\n", bad);
704         for (i = 0; i < 8; i++)
705                 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
706                         i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
707         printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
708         dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
709         dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
710         exit(1);
711 ok:
712         psxRegs.cycle = rregs.cycle + 2; // sync timing
713         ppc = psxRegs.pc;
714 }
715
716 #endif