2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../r3000a.h"
15 #include "../psxdma.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 char invalid_code[0x100000];
28 u32 event_cycles[PSXINT_COUNT];
30 static void schedule_timeslice(void)
32 u32 i, c = psxRegs.cycle;
33 u32 irqs = psxRegs.interrupt;
37 for (i = 0; irqs != 0; i++, irqs >>= 1) {
40 dif = event_cycles[i] - c;
41 //evprintf(" ev %d\n", dif);
42 if (0 < dif && dif < min)
45 next_interupt = c + min;
48 typedef void (irq_func)();
50 static irq_func * const irq_funcs[] = {
51 [PSXINT_SIO] = sioInterrupt,
52 [PSXINT_CDR] = cdrInterrupt,
53 [PSXINT_CDREAD] = cdrReadInterrupt,
54 [PSXINT_GPUDMA] = gpuInterrupt,
55 [PSXINT_MDECOUTDMA] = mdec1Interrupt,
56 [PSXINT_SPUDMA] = spuInterrupt,
57 [PSXINT_MDECINDMA] = mdec0Interrupt,
58 [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
59 [PSXINT_CDRDMA] = cdrDmaInterrupt,
60 [PSXINT_CDRLID] = cdrLidSeekInterrupt,
61 [PSXINT_CDRPLAY] = cdrPlayInterrupt,
62 [PSXINT_RCNT] = psxRcntUpdate,
65 /* local dupe of psxBranchTest, using event_cycles */
66 static void irq_test(void)
68 u32 irqs = psxRegs.interrupt;
69 u32 cycle = psxRegs.cycle;
72 // irq_funcs() may queue more irqs
73 psxRegs.interrupt = 0;
75 for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
78 if ((s32)(cycle - event_cycles[irq]) >= 0) {
83 psxRegs.interrupt |= irqs;
85 if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
86 psxException(0x400, 0);
87 pending_exception = 1;
93 evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
97 //pending_exception = 1;
101 evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
102 next_interupt, next_interupt - psxRegs.cycle);
106 extern void MTC0(int reg, u32 val);
108 void pcsx_mtc0(u32 reg, u32 val)
110 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
113 if (Cause & Status & 0x0300) // possible sw irq
114 pending_exception = 1;
117 void pcsx_mtc0_ds(u32 reg, u32 val)
119 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
123 void new_dyna_save(void)
125 psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat
127 // psxRegs.intCycle is always maintained, no need to convert
130 void new_dyna_after_save(void)
132 psxRegs.interrupt |= 1 << PSXINT_RCNT;
135 void new_dyna_restore(void)
138 for (i = 0; i < PSXINT_COUNT; i++)
139 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
141 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
142 psxRegs.interrupt |= 1 << PSXINT_RCNT;
143 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
145 new_dyna_pcsx_mem_load_state();
149 void *gte_handlers[64];
151 void *gte_handlers_nf[64] = {
152 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
153 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
154 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
155 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
156 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
157 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
158 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
159 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
162 const char *gte_regnames[64] = {
163 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
164 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
165 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
166 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
167 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
168 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
169 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
170 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
173 /* from gte.txt.. not sure if this is any good. */
174 const char gte_cycletab[64] = {
175 /* 1 2 3 4 5 6 7 8 9 a b c d e f */
176 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0,
177 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0,
178 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0,
179 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39,
186 #define GCBITS3(b0,b1,b2) \
187 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
188 #define GDBITS2(b0,b1) \
189 (GDBIT(b0) | GDBIT(b1))
190 #define GDBITS3(b0,b1,b2) \
191 (GDBITS2(b0,b1) | GDBIT(b2))
192 #define GDBITS4(b0,b1,b2,b3) \
193 (GDBITS3(b0,b1,b2) | GDBIT(b3))
194 #define GDBITS5(b0,b1,b2,b3,b4) \
195 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
196 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
197 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
198 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
199 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
200 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
201 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
202 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
203 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
204 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
205 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
207 const uint64_t gte_reg_reads[64] = {
208 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
209 [GTE_NCLIP] = GDBITS3(12,13,14),
210 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
211 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
212 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
213 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further?
214 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22),
215 [GTE_CDP] = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22),
216 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
217 [GTE_NCCS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
218 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
219 [GTE_NCS] = 0x001fff0000000000ll | GDBITS4(0,1,21,22),
220 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
221 [GTE_SQR] = GDBITS3(9,10,11),
222 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
223 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
224 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
225 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
226 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
227 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
228 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
229 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
232 // note: this excludes gteFLAG that is always written to
233 const uint64_t gte_reg_writes[64] = {
234 [GTE_RTPS] = 0x0f0f7f00ll,
235 [GTE_NCLIP] = GDBIT(24),
236 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
237 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
238 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
239 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
240 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
241 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
242 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
243 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
244 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
245 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
246 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
247 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
248 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
249 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
250 [GTE_AVSZ3] = GDBITS2(7,24),
251 [GTE_AVSZ4] = GDBITS2(7,24),
252 [GTE_RTPT] = 0x0f0f7f00ll,
253 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
254 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
255 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
258 static int ari64_init()
260 extern void (*psxCP2[64])();
261 extern void psxNULL();
265 new_dyna_pcsx_mem_init();
267 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
268 if (psxCP2[i] != psxNULL)
269 gte_handlers[i] = psxCP2[i];
271 #if !defined(DRC_DBG)
273 gte_handlers[0x06] = gteNCLIP_arm;
274 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
275 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
278 // compiler's _nf version is still a lot slower than neon
279 // _nf_arm RTPS is roughly the same, RTPT slower
280 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
281 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
285 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
288 zeromem_ptr = zero_mem;
293 static void ari64_reset()
295 printf("ari64_reset\n");
296 new_dyna_pcsx_mem_reset();
297 invalidate_all_pages();
299 pending_exception = 1;
302 // execute until predefined leave points
303 // (HLE softcall exit and BIOS fastboot end)
304 static void ari64_execute_until()
306 schedule_timeslice();
308 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
309 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
313 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
314 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
317 static void ari64_execute()
320 ari64_execute_until();
321 evprintf("drc left @%08x\n", psxRegs.pc);
325 static void ari64_clear(u32 addr, u32 size)
327 u32 start, end, main_ram;
329 size *= 4; /* PCSX uses DMA units */
331 evprintf("ari64_clear %08x %04x\n", addr, size);
333 /* check for RAM mirrors */
334 main_ram = (addr & 0xffe00000) == 0x80000000;
337 end = (addr + size) >> 12;
339 for (; start <= end; start++)
340 if (!main_ram || !invalid_code[start])
341 invalidate_block(start);
344 static void ari64_shutdown()
346 new_dynarec_cleanup();
349 extern void intExecute();
350 extern void intExecuteT();
351 extern void intExecuteBlock();
352 extern void intExecuteBlockT();
354 #define intExecuteT intExecute
355 #define intExecuteBlockT intExecuteBlock
374 void do_insn_trace() {}
375 void do_insn_cmp() {}
378 #if defined(__x86_64__) || defined(__i386__)
379 unsigned int address;
380 int pending_exception, stop;
381 unsigned int next_interupt;
382 int new_dynarec_did_compile;
383 int cycle_multiplier;
384 int new_dynarec_hacks;
388 void new_dynarec_init() {}
389 void new_dyna_start() {}
390 void new_dynarec_cleanup() {}
391 void new_dynarec_clear_full() {}
392 void invalidate_all_pages() {}
393 void invalidate_block(unsigned int block) {}
394 void new_dyna_pcsx_mem_init(void) {}
395 void new_dyna_pcsx_mem_reset(void) {}
396 void new_dyna_pcsx_mem_load_state(void) {}
403 extern u32 last_io_addr;
405 static void dump_mem(const char *fname, void *mem, size_t size)
407 FILE *f1 = fopen(fname, "wb");
409 f1 = fopen(strrchr(fname, '/') + 1, "wb");
410 fwrite(mem, 1, size, f1);
414 static u32 memcheck_read(u32 a)
416 if ((a >> 16) == 0x1f80)
418 return *(u32 *)(psxH + (a & 0xfffc));
420 if ((a >> 16) == 0x1f00)
422 return *(u32 *)(psxP + (a & 0xfffc));
424 // if ((a & ~0xe0600000) < 0x200000)
426 return *(u32 *)(psxM + (a & 0x1ffffc));
429 void do_insn_trace(void)
431 static psxRegisters oldregs;
432 static u32 old_io_addr = (u32)-1;
433 static u32 old_io_data = 0xbad0c0de;
434 static u32 event_cycles_o[PSXINT_COUNT];
435 u32 *allregs_p = (void *)&psxRegs;
436 u32 *allregs_o = (void *)&oldregs;
441 //last_io_addr = 0x5e2c8;
443 f = fopen("tracelog", "wb");
446 oldregs.code = psxRegs.code; // don't care
447 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
448 if (allregs_p[i] != allregs_o[i]) {
450 fwrite(&allregs_p[i], 1, 4, f);
451 allregs_o[i] = allregs_p[i];
455 for (i = 0; i < PSXINT_COUNT; i++) {
456 if (event_cycles[i] != event_cycles_o[i]) {
458 fwrite(&byte, 1, 1, f);
460 fwrite(&event_cycles[i], 1, 4, f);
461 event_cycles_o[i] = event_cycles[i];
465 if (old_io_addr != last_io_addr) {
467 fwrite(&byte, 1, 1, f);
468 fwrite(&last_io_addr, 1, 4, f);
469 old_io_addr = last_io_addr;
471 io_data = memcheck_read(last_io_addr);
472 if (old_io_data != io_data) {
474 fwrite(&byte, 1, 1, f);
475 fwrite(&io_data, 1, 4, f);
476 old_io_data = io_data;
479 fwrite(&byte, 1, 1, f);
482 if (psxRegs.cycle == 190230) {
483 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
484 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
491 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
492 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
493 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
494 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
495 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
497 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
498 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
499 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
500 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
502 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
503 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
504 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
505 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
507 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
508 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
509 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
510 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
512 "PC", "code", "cycle", "interrupt",
520 static int miss_log_i;
521 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
522 #define miss_log_mask (miss_log_len-1)
524 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
526 miss_log[miss_log_i].reg = reg;
527 miss_log[miss_log_i].val = val;
528 miss_log[miss_log_i].val_expect = val_expect;
529 miss_log[miss_log_i].pc = pc;
530 miss_log[miss_log_i].cycle = cycle;
531 miss_log_i = (miss_log_i + 1) & miss_log_mask;
536 void do_insn_cmp(void)
538 static psxRegisters rregs;
539 static u32 mem_addr, mem_val;
540 u32 *allregs_p = (void *)&psxRegs;
541 u32 *allregs_e = (void *)&rregs;
542 static u32 ppc, failcount;
543 int i, ret, bad = 0, which_event = -1;
548 f = fopen("tracelog", "rb");
551 if ((ret = fread(&code, 1, 1, f)) <= 0)
560 fread(&which_event, 1, 1, f);
561 fread(&ev_cycles, 1, 4, f);
564 fread(&mem_addr, 1, 4, f);
567 fread(&mem_val, 1, 4, f);
570 fread(&allregs_e[code], 1, 4, f);
578 psxRegs.code = rregs.code; // don't care
579 psxRegs.cycle = rregs.cycle;
580 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
582 //if (psxRegs.cycle == 166172) breakme();
584 if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
585 mem_val == memcheck_read(mem_addr)
591 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
592 if (allregs_p[i] != allregs_e[i]) {
593 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
598 if (mem_val != memcheck_read(mem_addr)) {
599 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
603 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
604 printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles);
608 if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
609 static int last_mcycle;
610 if (last_mcycle != psxRegs.cycle >> 20) {
611 printf("%u\n", psxRegs.cycle);
612 last_mcycle = psxRegs.cycle >> 20;
619 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
620 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
621 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
622 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
623 printf("-- %d\n", bad);
624 for (i = 0; i < 8; i++)
625 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
626 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
627 printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
628 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
629 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
632 psxRegs.cycle = rregs.cycle + 2; // sync timing