pcnt: measure gte too
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / emu_if.c
1 /*
2  * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
3  *
4  * This work is licensed under the terms of GNU GPL version 2 or later.
5  * See the COPYING file in the top-level directory.
6  */
7
8 #include <stdio.h>
9
10 #include "emu_if.h"
11 #include "pcsxmem.h"
12 #include "../psxhle.h"
13 #include "../r3000a.h"
14 #include "../cdrom.h"
15 #include "../psxdma.h"
16 #include "../mdec.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
19 #define FLAGLESS
20 #include "../gte.h"
21
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23
24 //#define evprintf printf
25 #define evprintf(...)
26
27 char invalid_code[0x100000];
28 u32 event_cycles[PSXINT_COUNT];
29
30 static void schedule_timeslice(void)
31 {
32         u32 i, c = psxRegs.cycle;
33         s32 min, dif;
34
35         min = psxNextsCounter + psxNextCounter - c;
36         for (i = 0; i < ARRAY_SIZE(event_cycles); i++) {
37                 dif = event_cycles[i] - c;
38                 //evprintf("  ev %d\n", dif);
39                 if (0 < dif && dif < min)
40                         min = dif;
41         }
42         next_interupt = c + min;
43
44 #if 0
45         static u32 cnt, last_cycle;
46         static u64 sum;
47         if (last_cycle) {
48                 cnt++;
49                 sum += psxRegs.cycle - last_cycle;
50                 if ((cnt & 0xff) == 0)
51                         printf("%u\n", (u32)(sum / cnt));
52         }
53         last_cycle = psxRegs.cycle;
54 #endif
55 }
56
57 typedef void (irq_func)();
58
59 static irq_func * const irq_funcs[] = {
60         [PSXINT_SIO]    = sioInterrupt,
61         [PSXINT_CDR]    = cdrInterrupt,
62         [PSXINT_CDREAD] = cdrReadInterrupt,
63         [PSXINT_GPUDMA] = gpuInterrupt,
64         [PSXINT_MDECOUTDMA] = mdec1Interrupt,
65         [PSXINT_SPUDMA] = spuInterrupt,
66         [PSXINT_MDECINDMA] = mdec0Interrupt,
67         [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
68         [PSXINT_CDRDMA] = cdrDmaInterrupt,
69         [PSXINT_CDRLID] = cdrLidSeekInterrupt,
70         [PSXINT_CDRPLAY] = cdrPlayInterrupt,
71 };
72
73 /* local dupe of psxBranchTest, using event_cycles */
74 static void irq_test(void)
75 {
76         u32 irqs = psxRegs.interrupt;
77         u32 cycle = psxRegs.cycle;
78         u32 irq, irq_bits;
79
80         if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
81                 psxRcntUpdate();
82
83         // irq_funcs() may queue more irqs
84         psxRegs.interrupt = 0;
85
86         for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
87                 if (!(irq_bits & 1))
88                         continue;
89                 if ((s32)(cycle - event_cycles[irq]) >= 0) {
90                         irqs &= ~(1 << irq);
91                         irq_funcs[irq]();
92                 }
93         }
94         psxRegs.interrupt |= irqs;
95
96         if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
97                 psxException(0x400, 0);
98                 pending_exception = 1;
99         }
100 }
101
102 void gen_interupt()
103 {
104         evprintf("  +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
105
106         irq_test();
107         //psxBranchTest();
108         //pending_exception = 1;
109
110         schedule_timeslice();
111
112         evprintf("  -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
113                 next_interupt, next_interupt - psxRegs.cycle);
114 }
115
116 // from interpreter
117 extern void MTC0(int reg, u32 val);
118
119 void pcsx_mtc0(u32 reg, u32 val)
120 {
121         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
122         MTC0(reg, val);
123         gen_interupt();
124 }
125
126 void pcsx_mtc0_ds(u32 reg, u32 val)
127 {
128         evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
129         MTC0(reg, val);
130 }
131
132 void new_dyna_save(void)
133 {
134         // psxRegs.intCycle is always maintained, no need to convert
135 }
136
137 void new_dyna_restore(void)
138 {
139         int i;
140         for (i = 0; i < PSXINT_COUNT; i++)
141                 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
142
143         new_dyna_pcsx_mem_load_state();
144 }
145
146 /* GTE stuff */
147 void *gte_handlers[64];
148
149 void *gte_handlers_nf[64] = {
150         NULL      , gteRTPS_nf , NULL       , NULL      , NULL     , NULL       , gteNCLIP_nf, NULL      , // 00
151         NULL      , NULL       , NULL       , NULL      , gteOP_nf , NULL       , NULL       , NULL      , // 08
152         gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL       , gteNCDT_nf , NULL      , // 10
153         NULL      , NULL       , NULL       , gteNCCS_nf, gteCC_nf , NULL       , gteNCS_nf  , NULL      , // 18
154         gteNCT_nf , NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 20
155         gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL      , NULL     , gteAVSZ3_nf, gteAVSZ4_nf, NULL      , // 28 
156         gteRTPT_nf, NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 30
157         NULL      , NULL       , NULL       , NULL      , NULL     , gteGPF_nf  , gteGPL_nf  , gteNCCT_nf, // 38
158 };
159
160 const char *gte_regnames[64] = {
161         NULL  , "RTPS" , NULL   , NULL  , NULL , NULL   , "NCLIP", NULL  , // 00
162         NULL  , NULL   , NULL   , NULL  , "OP" , NULL   , NULL   , NULL  , // 08
163         "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL   , "NCDT" , NULL  , // 10
164         NULL  , NULL   , NULL   , "NCCS", "CC" , NULL   , "NCS"  , NULL  , // 18
165         "NCT" , NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 20
166         "SQR" , "DCPL" , "DPCT" , NULL  , NULL , "AVSZ3", "AVSZ4", NULL  , // 28 
167         "RTPT", NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 30
168         NULL  , NULL   , NULL   , NULL  , NULL , "GPF"  , "GPL"  , "NCCT", // 38
169 };
170
171 /* from gte.txt.. not sure if this is any good. */
172 const char gte_cycletab[64] = {
173         /*   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f */
174          0, 15,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  6,  0,  0,  0,
175          8,  8,  8, 19, 13,  0, 44,  0,  0,  0,  0, 17, 11,  0, 14,  0,
176         30,  0,  0,  0,  0,  0,  0,  0,  5,  8, 17,  0,  0,  5,  6,  0,
177         23,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5, 39,
178 };
179
180 #define GCBIT(x) \
181         (1ll << (32+x))
182 #define GDBIT(x) \
183         (1ll << (x))
184 #define GCBITS3(b0,b1,b2) \
185         (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
186 #define GDBITS2(b0,b1) \
187         (GDBIT(b0) | GDBIT(b1))
188 #define GDBITS3(b0,b1,b2) \
189         (GDBITS2(b0,b1) | GDBIT(b2))
190 #define GDBITS4(b0,b1,b2,b3) \
191         (GDBITS3(b0,b1,b2) | GDBIT(b3))
192 #define GDBITS5(b0,b1,b2,b3,b4) \
193         (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
194 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
195         (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
196 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
197         (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
198 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
199         (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
200 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
201         (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
202 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
203         (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
204
205 const uint64_t gte_reg_reads[64] = {
206         [GTE_RTPS]  = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
207         [GTE_NCLIP] =                        GDBITS3(12,13,14),
208         [GTE_OP]    = GCBITS3(0,2,4)       | GDBITS3(9,10,11),
209         [GTE_DPCS]  = GCBITS3(21,22,23)    | GDBITS4(6,8,21,22),
210         [GTE_INTPL] = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
211         [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further?
212         [GTE_NCDS]  = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22),
213         [GTE_CDP]   = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22),
214         [GTE_NCDT]  = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
215         [GTE_NCCS]  = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
216         [GTE_CC]    = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
217         [GTE_NCS]   = 0x001fff0000000000ll | GDBITS4(0,1,21,22),
218         [GTE_NCT]   = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
219         [GTE_SQR]   =                        GDBITS3(9,10,11),
220         [GTE_DCPL]  = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
221         [GTE_DPCT]  = GCBITS3(21,22,23)    | GDBITS4(8,20,21,22),
222         [GTE_AVSZ3] = GCBIT(29)            | GDBITS3(17,18,19),
223         [GTE_AVSZ4] = GCBIT(30)            | GDBITS4(16,17,18,19),
224         [GTE_RTPT]  = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
225         [GTE_GPF]   =                        GDBITS7(6,8,9,10,11,21,22),
226         [GTE_GPL]   =                        GDBITS10(6,8,9,10,11,21,22,25,26,27),
227         [GTE_NCCT]  = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
228 };
229
230 // note: this excludes gteFLAG that is always written to
231 const uint64_t gte_reg_writes[64] = {
232         [GTE_RTPS]  = 0x0f0f7f00ll,
233         [GTE_NCLIP] = GDBIT(24),
234         [GTE_OP]    = GDBITS6(9,10,11,25,26,27),
235         [GTE_DPCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
236         [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
237         [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
238         [GTE_NCDS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
239         [GTE_CDP]   = GDBITS9(9,10,11,20,21,22,25,26,27),
240         [GTE_NCDT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
241         [GTE_NCCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
242         [GTE_CC]    = GDBITS9(9,10,11,20,21,22,25,26,27),
243         [GTE_NCS]   = GDBITS9(9,10,11,20,21,22,25,26,27),
244         [GTE_NCT]   = GDBITS9(9,10,11,20,21,22,25,26,27),
245         [GTE_SQR]   = GDBITS6(9,10,11,25,26,27),
246         [GTE_DCPL]  = GDBITS9(9,10,11,20,21,22,25,26,27),
247         [GTE_DPCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
248         [GTE_AVSZ3] = GDBITS2(7,24),
249         [GTE_AVSZ4] = GDBITS2(7,24),
250         [GTE_RTPT]  = 0x0f0f7f00ll,
251         [GTE_GPF]   = GDBITS9(9,10,11,20,21,22,25,26,27),
252         [GTE_GPL]   = GDBITS9(9,10,11,20,21,22,25,26,27),
253         [GTE_NCCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
254 };
255
256 static int ari64_init()
257 {
258         extern void (*psxCP2[64])();
259         extern void psxNULL();
260         size_t i;
261
262         new_dynarec_init();
263         new_dyna_pcsx_mem_init();
264
265         for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
266                 if (psxCP2[i] != psxNULL)
267                         gte_handlers[i] = psxCP2[i];
268
269 #if !defined(DRC_DBG)
270 #ifdef __arm__
271         gte_handlers[0x06] = gteNCLIP_arm;
272         gte_handlers_nf[0x01] = gteRTPS_nf_arm;
273         gte_handlers_nf[0x30] = gteRTPT_nf_arm;
274 #endif
275 #ifdef __ARM_NEON__
276         // compiler's _nf version is still a lot slower than neon
277         // _nf_arm RTPS is roughly the same, RTPT slower
278         gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
279         gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
280 #endif
281 #endif
282 #ifdef DRC_DBG
283         memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
284 #endif
285         psxH_ptr = psxH;
286         zeromem_ptr = zero_mem;
287
288         return 0;
289 }
290
291 static void ari64_reset()
292 {
293         printf("ari64_reset\n");
294         new_dyna_pcsx_mem_reset();
295         invalidate_all_pages();
296         new_dyna_restore();
297         pending_exception = 1;
298 }
299
300 // execute until predefined leave points
301 // (HLE softcall exit and BIOS fastboot end)
302 static void ari64_execute_until()
303 {
304         schedule_timeslice();
305
306         evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
307                 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
308
309         new_dyna_start();
310
311         evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
312                 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
313 }
314
315 static void ari64_execute()
316 {
317         while (!stop) {
318                 ari64_execute_until();
319                 evprintf("drc left @%08x\n", psxRegs.pc);
320         }
321 }
322
323 static void ari64_clear(u32 addr, u32 size)
324 {
325         u32 start, end, main_ram;
326
327         size *= 4; /* PCSX uses DMA units */
328
329         evprintf("ari64_clear %08x %04x\n", addr, size);
330
331         /* check for RAM mirrors */
332         main_ram = (addr & 0xffe00000) == 0x80000000;
333
334         start = addr >> 12;
335         end = (addr + size) >> 12;
336
337         for (; start <= end; start++)
338                 if (!main_ram || !invalid_code[start])
339                         invalidate_block(start);
340 }
341
342 static void ari64_shutdown()
343 {
344         new_dynarec_cleanup();
345 }
346
347 extern void intExecute();
348 extern void intExecuteT();
349 extern void intExecuteBlock();
350 extern void intExecuteBlockT();
351 #ifndef DRC_DBG
352 #define intExecuteT intExecute
353 #define intExecuteBlockT intExecuteBlock
354 #endif
355
356 R3000Acpu psxRec = {
357         ari64_init,
358         ari64_reset,
359 #if defined(__arm__)
360         ari64_execute,
361         ari64_execute_until,
362 #else
363         intExecuteT,
364         intExecuteBlockT,
365 #endif
366         ari64_clear,
367         ari64_shutdown
368 };
369
370 // TODO: rm
371 #ifndef DRC_DBG
372 void do_insn_trace() {}
373 void do_insn_cmp() {}
374 #endif
375
376 #if defined(__x86_64__) || defined(__i386__)
377 unsigned int address;
378 int pending_exception, stop;
379 unsigned int next_interupt;
380 int new_dynarec_did_compile;
381 int cycle_multiplier;
382 void *psxH_ptr;
383 void *zeromem_ptr;
384 u8 zero_mem[0x1000];
385 void new_dynarec_init() {}
386 void new_dyna_start() {}
387 void new_dynarec_cleanup() {}
388 void new_dynarec_clear_full() {}
389 void invalidate_all_pages() {}
390 void invalidate_block(unsigned int block) {}
391 void new_dyna_pcsx_mem_init(void) {}
392 void new_dyna_pcsx_mem_reset(void) {}
393 void new_dyna_pcsx_mem_load_state(void) {}
394 #endif
395
396 #ifdef DRC_DBG
397
398 #include <stddef.h>
399 static FILE *f;
400 extern u32 last_io_addr;
401
402 static void dump_mem(const char *fname, void *mem, size_t size)
403 {
404         FILE *f1 = fopen(fname, "wb");
405         if (f1 == NULL)
406                 f1 = fopen(strrchr(fname, '/') + 1, "wb");
407         fwrite(mem, 1, size, f1);
408         fclose(f1);
409 }
410
411 static u32 memcheck_read(u32 a)
412 {
413         if ((a >> 16) == 0x1f80)
414                 // scratchpad/IO
415                 return *(u32 *)(psxH + (a & 0xfffc));
416
417         if ((a >> 16) == 0x1f00)
418                 // parallel
419                 return *(u32 *)(psxP + (a & 0xfffc));
420
421 //      if ((a & ~0xe0600000) < 0x200000)
422         // RAM
423         return *(u32 *)(psxM + (a & 0x1ffffc));
424 }
425
426 void do_insn_trace(void)
427 {
428         static psxRegisters oldregs;
429         static u32 old_io_addr = (u32)-1;
430         static u32 old_io_data = 0xbad0c0de;
431         u32 *allregs_p = (void *)&psxRegs;
432         u32 *allregs_o = (void *)&oldregs;
433         u32 io_data;
434         int i;
435         u8 byte;
436
437 //last_io_addr = 0x5e2c8;
438         if (f == NULL)
439                 f = fopen("tracelog", "wb");
440
441         oldregs.code = psxRegs.code; // don't care
442         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
443                 if (allregs_p[i] != allregs_o[i]) {
444                         fwrite(&i, 1, 1, f);
445                         fwrite(&allregs_p[i], 1, 4, f);
446                         allregs_o[i] = allregs_p[i];
447                 }
448         }
449         if (old_io_addr != last_io_addr) {
450                 byte = 0xfd;
451                 fwrite(&byte, 1, 1, f);
452                 fwrite(&last_io_addr, 1, 4, f);
453                 old_io_addr = last_io_addr;
454         }
455         io_data = memcheck_read(last_io_addr);
456         if (old_io_data != io_data) {
457                 byte = 0xfe;
458                 fwrite(&byte, 1, 1, f);
459                 fwrite(&io_data, 1, 4, f);
460                 old_io_data = io_data;
461         }
462         byte = 0xff;
463         fwrite(&byte, 1, 1, f);
464
465 #if 0
466         if (psxRegs.cycle == 190230) {
467                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
468                 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
469                 printf("dumped\n");
470                 exit(1);
471         }
472 #endif
473 }
474
475 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
476         "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
477         "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
478         "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
479         "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
480         "lo",  "hi",
481         "C0_0",  "C0_1",  "C0_2",  "C0_3",  "C0_4",  "C0_5",  "C0_6",  "C0_7",
482         "C0_8",  "C0_9",  "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
483         "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
484         "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
485
486         "C2D0",  "C2D1",  "C2D2",  "C2D3",  "C2D4",  "C2D5",  "C2D6",  "C2D7",
487         "C2D8",  "C2D9",  "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
488         "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
489         "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
490
491         "C2C0",  "C2C1",  "C2C2",  "C2C3",  "C2C4",  "C2C5",  "C2C6",  "C2C7",
492         "C2C8",  "C2C9",  "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
493         "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
494         "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
495
496         "PC", "code", "cycle", "interrupt",
497 };
498
499 static struct {
500         int reg;
501         u32 val, val_expect;
502         u32 pc, cycle;
503 } miss_log[64];
504 static int miss_log_i;
505 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
506 #define miss_log_mask (miss_log_len-1)
507
508 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
509 {
510         miss_log[miss_log_i].reg = reg;
511         miss_log[miss_log_i].val = val;
512         miss_log[miss_log_i].val_expect = val_expect;
513         miss_log[miss_log_i].pc = pc;
514         miss_log[miss_log_i].cycle = cycle;
515         miss_log_i = (miss_log_i + 1) & miss_log_mask;
516 }
517
518 void breakme() {}
519
520 void do_insn_cmp(void)
521 {
522         static psxRegisters rregs;
523         static u32 mem_addr, mem_val;
524         u32 *allregs_p = (void *)&psxRegs;
525         u32 *allregs_e = (void *)&rregs;
526         static u32 ppc, failcount;
527         int i, ret, bad = 0;
528         u8 code;
529
530         if (f == NULL)
531                 f = fopen("tracelog", "rb");
532
533         while (1) {
534                 if ((ret = fread(&code, 1, 1, f)) <= 0)
535                         break;
536                 if (ret <= 0)
537                         break;
538                 if (code == 0xff)
539                         break;
540                 if (code == 0xfd) {
541                         if ((ret = fread(&mem_addr, 1, 4, f)) <= 0)
542                                 break;
543                         continue;
544                 }
545                 if (code == 0xfe) {
546                         if ((ret = fread(&mem_val, 1, 4, f)) <= 0)
547                                 break;
548                         continue;
549                 }
550                 if ((ret = fread(&allregs_e[code], 1, 4, f)) <= 0)
551                         break;
552         }
553
554         if (ret <= 0) {
555                 printf("EOF?\n");
556                 goto end;
557         }
558
559         psxRegs.code = rregs.code; // don't care
560         psxRegs.cycle = rregs.cycle;
561         psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
562
563 //if (psxRegs.cycle == 166172) breakme();
564
565         if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
566                         mem_val == memcheck_read(mem_addr)
567            ) {
568                 failcount = 0;
569                 goto ok;
570         }
571
572         for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
573                 if (allregs_p[i] != allregs_e[i]) {
574                         miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
575                         bad++;
576                 }
577         }
578
579         if (mem_val != memcheck_read(mem_addr)) {
580                 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
581                 goto end;
582         }
583
584         if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
585                 static int last_mcycle;
586                 if (last_mcycle != psxRegs.cycle >> 20) {
587                         printf("%u\n", psxRegs.cycle);
588                         last_mcycle = psxRegs.cycle >> 20;
589                 }
590                 failcount++;
591                 goto ok;
592         }
593
594 end:
595         for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
596                 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
597                         regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
598                         miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
599         printf("-- %d\n", bad);
600         for (i = 0; i < 8; i++)
601                 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
602                         i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
603         printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
604         dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
605         dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
606         exit(1);
607 ok:
608         psxRegs.cycle = rregs.cycle + 2; // sync timing
609         ppc = psxRegs.pc;
610 }
611
612 #endif