2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../r3000a.h"
15 #include "../psxdma.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 char invalid_code[0x100000];
28 u32 event_cycles[PSXINT_COUNT];
30 static void schedule_timeslice(void)
32 u32 i, c = psxRegs.cycle;
33 u32 irqs = psxRegs.interrupt;
37 for (i = 0; irqs != 0; i++, irqs >>= 1) {
40 dif = event_cycles[i] - c;
41 //evprintf(" ev %d\n", dif);
42 if (0 < dif && dif < min)
45 next_interupt = c + min;
48 typedef void (irq_func)();
50 static irq_func * const irq_funcs[] = {
51 [PSXINT_SIO] = sioInterrupt,
52 [PSXINT_CDR] = cdrInterrupt,
53 [PSXINT_CDREAD] = cdrReadInterrupt,
54 [PSXINT_GPUDMA] = gpuInterrupt,
55 [PSXINT_MDECOUTDMA] = mdec1Interrupt,
56 [PSXINT_SPUDMA] = spuInterrupt,
57 [PSXINT_MDECINDMA] = mdec0Interrupt,
58 [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
59 [PSXINT_CDRDMA] = cdrDmaInterrupt,
60 [PSXINT_CDRLID] = cdrLidSeekInterrupt,
61 [PSXINT_CDRPLAY] = cdrPlayInterrupt,
62 [PSXINT_RCNT] = psxRcntUpdate,
65 /* local dupe of psxBranchTest, using event_cycles */
66 static void irq_test(void)
68 u32 irqs = psxRegs.interrupt;
69 u32 cycle = psxRegs.cycle;
72 // irq_funcs() may queue more irqs
73 psxRegs.interrupt = 0;
75 for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
78 if ((s32)(cycle - event_cycles[irq]) >= 0) {
83 psxRegs.interrupt |= irqs;
85 if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
86 psxException(0x400, 0);
87 pending_exception = 1;
93 evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
97 //pending_exception = 1;
101 evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
102 next_interupt, next_interupt - psxRegs.cycle);
106 extern void MTC0(int reg, u32 val);
108 void pcsx_mtc0(u32 reg, u32 val)
110 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
113 if (Cause & Status & 0x0300) // possible sw irq
114 pending_exception = 1;
117 void pcsx_mtc0_ds(u32 reg, u32 val)
119 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
123 void new_dyna_save(void)
125 psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat
127 // psxRegs.intCycle is always maintained, no need to convert
130 void new_dyna_after_save(void)
132 psxRegs.interrupt |= 1 << PSXINT_RCNT;
135 void new_dyna_restore(void)
138 for (i = 0; i < PSXINT_COUNT; i++)
139 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
141 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
142 psxRegs.interrupt |= 1 << PSXINT_RCNT;
143 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
145 new_dyna_pcsx_mem_load_state();
149 void *gte_handlers[64];
151 void *gte_handlers_nf[64] = {
152 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
153 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
154 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
155 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
156 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
157 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
158 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
159 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
162 const char *gte_regnames[64] = {
163 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
164 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
165 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
166 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
167 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
168 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
169 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
170 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
173 /* from gte.txt.. not sure if this is any good. */
174 const char gte_cycletab[64] = {
175 /* 1 2 3 4 5 6 7 8 9 a b c d e f */
176 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0,
177 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0,
178 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0,
179 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39,
186 #define GCBITS3(b0,b1,b2) \
187 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
188 #define GDBITS2(b0,b1) \
189 (GDBIT(b0) | GDBIT(b1))
190 #define GDBITS3(b0,b1,b2) \
191 (GDBITS2(b0,b1) | GDBIT(b2))
192 #define GDBITS4(b0,b1,b2,b3) \
193 (GDBITS3(b0,b1,b2) | GDBIT(b3))
194 #define GDBITS5(b0,b1,b2,b3,b4) \
195 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
196 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
197 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
198 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
199 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
200 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
201 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
202 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
203 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
204 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
205 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
207 const uint64_t gte_reg_reads[64] = {
208 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
209 [GTE_NCLIP] = GDBITS3(12,13,14),
210 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
211 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
212 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
213 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
214 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
215 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
216 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
217 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
218 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
219 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
220 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
221 [GTE_SQR] = GDBITS3(9,10,11),
222 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
223 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
224 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
225 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
226 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
227 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
228 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
229 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
232 // note: this excludes gteFLAG that is always written to
233 const uint64_t gte_reg_writes[64] = {
234 [GTE_RTPS] = 0x0f0f7f00ll,
235 [GTE_NCLIP] = GDBIT(24),
236 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
237 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
238 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
239 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
240 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
241 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
242 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
243 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
244 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
245 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
246 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
247 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
248 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
249 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
250 [GTE_AVSZ3] = GDBITS2(7,24),
251 [GTE_AVSZ4] = GDBITS2(7,24),
252 [GTE_RTPT] = 0x0f0f7f00ll,
253 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
254 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
255 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
258 static int ari64_init()
260 extern void (*psxCP2[64])();
261 extern void psxNULL();
265 new_dyna_pcsx_mem_init();
267 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
268 if (psxCP2[i] != psxNULL)
269 gte_handlers[i] = psxCP2[i];
271 #if !defined(DRC_DBG)
273 gte_handlers[0x06] = gteNCLIP_arm;
274 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
275 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
278 // compiler's _nf version is still a lot slower than neon
279 // _nf_arm RTPS is roughly the same, RTPT slower
280 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
281 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
285 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
288 zeromem_ptr = zero_mem;
293 static void ari64_reset()
295 printf("ari64_reset\n");
296 new_dyna_pcsx_mem_reset();
297 invalidate_all_pages();
299 pending_exception = 1;
303 // execute until predefined leave points
304 // (HLE softcall exit and BIOS fastboot end)
305 static void ari64_execute_until()
307 schedule_timeslice();
309 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
310 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
314 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
315 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
318 static void ari64_execute()
321 ari64_execute_until();
322 evprintf("drc left @%08x\n", psxRegs.pc);
327 static void ari64_clear(u32 addr, u32 size)
329 u32 start, end, main_ram;
331 size *= 4; /* PCSX uses DMA units */
333 evprintf("ari64_clear %08x %04x\n", addr, size);
335 /* check for RAM mirrors */
336 main_ram = (addr & 0xffe00000) == 0x80000000;
339 end = (addr + size) >> 12;
341 for (; start <= end; start++)
342 if (!main_ram || !invalid_code[start])
343 invalidate_block(start);
346 static void ari64_shutdown()
348 new_dynarec_cleanup();
351 extern void intExecute();
352 extern void intExecuteT();
353 extern void intExecuteBlock();
354 extern void intExecuteBlockT();
356 #define intExecuteT intExecute
357 #define intExecuteBlockT intExecuteBlock
376 void do_insn_trace() {}
377 void do_insn_cmp() {}
380 #if defined(__x86_64__) || defined(__i386__)
381 unsigned int address;
382 int pending_exception, stop;
383 unsigned int next_interupt;
384 int new_dynarec_did_compile;
385 int cycle_multiplier;
386 int new_dynarec_hacks;
390 void new_dynarec_init() {}
391 void new_dyna_start() {}
392 void new_dynarec_cleanup() {}
393 void new_dynarec_clear_full() {}
394 void invalidate_all_pages() {}
395 void invalidate_block(unsigned int block) {}
396 void new_dyna_pcsx_mem_init(void) {}
397 void new_dyna_pcsx_mem_reset(void) {}
398 void new_dyna_pcsx_mem_load_state(void) {}
405 extern u32 last_io_addr;
407 static void dump_mem(const char *fname, void *mem, size_t size)
409 FILE *f1 = fopen(fname, "wb");
411 f1 = fopen(strrchr(fname, '/') + 1, "wb");
412 fwrite(mem, 1, size, f1);
416 static u32 memcheck_read(u32 a)
418 if ((a >> 16) == 0x1f80)
420 return *(u32 *)(psxH + (a & 0xfffc));
422 if ((a >> 16) == 0x1f00)
424 return *(u32 *)(psxP + (a & 0xfffc));
426 // if ((a & ~0xe0600000) < 0x200000)
428 return *(u32 *)(psxM + (a & 0x1ffffc));
431 void do_insn_trace(void)
433 static psxRegisters oldregs;
434 static u32 old_io_addr = (u32)-1;
435 static u32 old_io_data = 0xbad0c0de;
436 static u32 event_cycles_o[PSXINT_COUNT];
437 u32 *allregs_p = (void *)&psxRegs;
438 u32 *allregs_o = (void *)&oldregs;
443 //last_io_addr = 0x5e2c8;
445 f = fopen("tracelog", "wb");
448 oldregs.code = psxRegs.code; // don't care
449 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
450 if (allregs_p[i] != allregs_o[i]) {
452 fwrite(&allregs_p[i], 1, 4, f);
453 allregs_o[i] = allregs_p[i];
457 for (i = 0; i < PSXINT_COUNT; i++) {
458 if (event_cycles[i] != event_cycles_o[i]) {
460 fwrite(&byte, 1, 1, f);
462 fwrite(&event_cycles[i], 1, 4, f);
463 event_cycles_o[i] = event_cycles[i];
467 if (old_io_addr != last_io_addr) {
469 fwrite(&byte, 1, 1, f);
470 fwrite(&last_io_addr, 1, 4, f);
471 old_io_addr = last_io_addr;
473 io_data = memcheck_read(last_io_addr);
474 if (old_io_data != io_data) {
476 fwrite(&byte, 1, 1, f);
477 fwrite(&io_data, 1, 4, f);
478 old_io_data = io_data;
481 fwrite(&byte, 1, 1, f);
484 if (psxRegs.cycle == 190230) {
485 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
486 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
493 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
494 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
495 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
496 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
497 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
499 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
500 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
501 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
502 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
504 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
505 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
506 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
507 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
509 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
510 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
511 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
512 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
514 "PC", "code", "cycle", "interrupt",
522 static int miss_log_i;
523 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
524 #define miss_log_mask (miss_log_len-1)
526 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
528 miss_log[miss_log_i].reg = reg;
529 miss_log[miss_log_i].val = val;
530 miss_log[miss_log_i].val_expect = val_expect;
531 miss_log[miss_log_i].pc = pc;
532 miss_log[miss_log_i].cycle = cycle;
533 miss_log_i = (miss_log_i + 1) & miss_log_mask;
538 void do_insn_cmp(void)
540 static psxRegisters rregs;
541 static u32 mem_addr, mem_val;
542 u32 *allregs_p = (void *)&psxRegs;
543 u32 *allregs_e = (void *)&rregs;
544 static u32 ppc, failcount;
545 int i, ret, bad = 0, which_event = -1;
550 f = fopen("tracelog", "rb");
553 if ((ret = fread(&code, 1, 1, f)) <= 0)
562 fread(&which_event, 1, 1, f);
563 fread(&ev_cycles, 1, 4, f);
566 fread(&mem_addr, 1, 4, f);
569 fread(&mem_val, 1, 4, f);
572 fread(&allregs_e[code], 1, 4, f);
580 psxRegs.code = rregs.code; // don't care
581 psxRegs.cycle = rregs.cycle;
582 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
584 //if (psxRegs.cycle == 166172) breakme();
586 if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
587 mem_val == memcheck_read(mem_addr)
593 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
594 if (allregs_p[i] != allregs_e[i]) {
595 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
600 if (mem_val != memcheck_read(mem_addr)) {
601 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
605 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
606 printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles);
610 if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
611 static int last_mcycle;
612 if (last_mcycle != psxRegs.cycle >> 20) {
613 printf("%u\n", psxRegs.cycle);
614 last_mcycle = psxRegs.cycle >> 20;
621 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
622 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
623 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
624 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
625 printf("-- %d\n", bad);
626 for (i = 0; i < 8; i++)
627 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
628 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
629 printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
630 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
631 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
634 psxRegs.cycle = rregs.cycle + 2; // sync timing