2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../r3000a.h"
15 #include "../psxdma.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 char invalid_code[0x100000];
28 u32 event_cycles[PSXINT_COUNT];
30 static void schedule_timeslice(void)
32 u32 i, c = psxRegs.cycle;
35 min = psxNextsCounter + psxNextCounter - c;
36 for (i = 0; i < ARRAY_SIZE(event_cycles); i++) {
37 dif = event_cycles[i] - c;
38 //evprintf(" ev %d\n", dif);
39 if (0 < dif && dif < min)
42 next_interupt = c + min;
45 static u32 cnt, last_cycle;
49 sum += psxRegs.cycle - last_cycle;
50 if ((cnt & 0xff) == 0)
51 printf("%u\n", (u32)(sum / cnt));
53 last_cycle = psxRegs.cycle;
57 typedef void (irq_func)();
59 static irq_func * const irq_funcs[] = {
60 [PSXINT_SIO] = sioInterrupt,
61 [PSXINT_CDR] = cdrInterrupt,
62 [PSXINT_CDREAD] = cdrReadInterrupt,
63 [PSXINT_GPUDMA] = gpuInterrupt,
64 [PSXINT_MDECOUTDMA] = mdec1Interrupt,
65 [PSXINT_SPUDMA] = spuInterrupt,
66 [PSXINT_MDECINDMA] = mdec0Interrupt,
67 [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
68 [PSXINT_CDRDMA] = cdrDmaInterrupt,
69 [PSXINT_CDRLID] = cdrLidSeekInterrupt,
70 [PSXINT_CDRPLAY] = cdrPlayInterrupt,
73 /* local dupe of psxBranchTest, using event_cycles */
74 static void irq_test(void)
76 u32 irqs = psxRegs.interrupt;
77 u32 cycle = psxRegs.cycle;
80 if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
83 // irq_funcs() may queue more irqs
84 psxRegs.interrupt = 0;
86 for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
89 if ((s32)(cycle - event_cycles[irq]) >= 0) {
94 psxRegs.interrupt |= irqs;
96 if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
97 psxException(0x400, 0);
98 pending_exception = 1;
104 evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
108 //pending_exception = 1;
110 schedule_timeslice();
112 evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
113 next_interupt, next_interupt - psxRegs.cycle);
117 extern void MTC0(int reg, u32 val);
119 void pcsx_mtc0(u32 reg, u32 val)
121 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
126 void pcsx_mtc0_ds(u32 reg, u32 val)
128 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
132 void new_dyna_save(void)
134 // psxRegs.intCycle is always maintained, no need to convert
137 void new_dyna_restore(void)
140 for (i = 0; i < PSXINT_COUNT; i++)
141 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
143 new_dyna_pcsx_mem_load_state();
147 void *gte_handlers[64];
149 void *gte_handlers_nf[64] = {
150 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
151 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
152 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
153 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
154 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
155 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
156 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
157 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
160 const char *gte_regnames[64] = {
161 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
162 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
163 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
164 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
165 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
166 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
167 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
168 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
171 /* from gte.txt.. not sure if this is any good. */
172 const char gte_cycletab[64] = {
173 /* 1 2 3 4 5 6 7 8 9 a b c d e f */
174 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0,
175 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0,
176 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0,
177 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39,
209 #define GCBITS3(b0,b1,b2) \
210 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
211 #define GDBITS2(b0,b1) \
212 (GDBIT(b0) | GDBIT(b1))
213 #define GDBITS3(b0,b1,b2) \
214 (GDBITS2(b0,b1) | GDBIT(b2))
215 #define GDBITS4(b0,b1,b2,b3) \
216 (GDBITS3(b0,b1,b2) | GDBIT(b3))
217 #define GDBITS5(b0,b1,b2,b3,b4) \
218 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
219 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
220 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
221 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
222 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
223 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
224 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
225 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
226 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
227 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
228 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
230 const uint64_t gte_reg_reads[64] = {
231 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
232 [GTE_NCLIP] = GDBITS3(12,13,14),
233 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
234 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
235 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
236 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further?
237 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22),
238 [GTE_CDP] = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22),
239 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
240 [GTE_NCCS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
241 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
242 [GTE_NCS] = 0x001fff0000000000ll | GDBITS4(0,1,21,22),
243 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
244 [GTE_SQR] = GDBITS3(9,10,11),
245 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
246 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
247 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
248 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
249 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
250 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
251 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
252 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
255 // note: this excludes gteFLAG that is always written to
256 const uint64_t gte_reg_writes[64] = {
257 [GTE_RTPS] = 0x0f0f7f00ll,
258 [GTE_NCLIP] = GDBIT(24),
259 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
260 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
261 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
262 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
263 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
264 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
265 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
266 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
267 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
268 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
269 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
270 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
271 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
272 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
273 [GTE_AVSZ3] = GDBITS2(7,24),
274 [GTE_AVSZ4] = GDBITS2(7,24),
275 [GTE_RTPT] = 0x0f0f7f00ll,
276 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
277 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
278 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
281 static int ari64_init()
283 extern void (*psxCP2[64])();
284 extern void psxNULL();
288 new_dyna_pcsx_mem_init();
290 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
291 if (psxCP2[i] != psxNULL)
292 gte_handlers[i] = psxCP2[i];
294 #if !defined(DRC_DBG) && !defined(PCNT)
296 gte_handlers[0x06] = gteNCLIP_arm;
297 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
298 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
301 // compiler's _nf version is still a lot slower then neon
302 // _nf_arm RTPS is roughly the same, RTPT slower
303 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
304 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
305 gte_handlers[0x12] = gte_handlers_nf[0x12] = gteMVMVA_neon;
309 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
316 static void ari64_reset()
318 printf("ari64_reset\n");
319 new_dyna_pcsx_mem_reset();
320 invalidate_all_pages();
322 pending_exception = 1;
325 // execute until predefined leave points
326 // (HLE softcall exit and BIOS fastboot end)
327 static void ari64_execute_until()
329 schedule_timeslice();
331 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
332 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
336 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
337 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
340 static void ari64_execute()
343 ari64_execute_until();
344 evprintf("drc left @%08x\n", psxRegs.pc);
348 static void ari64_clear(u32 addr, u32 size)
350 u32 start, end, main_ram;
352 size *= 4; /* PCSX uses DMA units */
354 evprintf("ari64_clear %08x %04x\n", addr, size);
356 /* check for RAM mirrors */
357 main_ram = (addr & 0xffe00000) == 0x80000000;
360 end = (addr + size) >> 12;
362 for (; start <= end; start++)
363 if (!main_ram || !invalid_code[start])
364 invalidate_block(start);
367 static void ari64_shutdown()
369 new_dynarec_cleanup();
372 extern void intExecute();
373 extern void intExecuteT();
374 extern void intExecuteBlock();
375 extern void intExecuteBlockT();
377 #define intExecuteT intExecute
378 #define intExecuteBlockT intExecuteBlock
397 void do_insn_trace() {}
398 void do_insn_cmp() {}
401 #if defined(__x86_64__) || defined(__i386__)
402 unsigned int address, readmem_word, word;
403 unsigned short hword;
405 int pending_exception, stop;
406 unsigned int next_interupt;
407 int new_dynarec_did_compile;
408 int cycle_multiplier;
410 void new_dynarec_init() {}
411 void new_dyna_start() {}
412 void new_dynarec_cleanup() {}
413 void new_dynarec_clear_full() {}
414 void invalidate_all_pages() {}
415 void invalidate_block(unsigned int block) {}
416 void new_dyna_pcsx_mem_init(void) {}
417 void new_dyna_pcsx_mem_reset(void) {}
418 void new_dyna_pcsx_mem_load_state(void) {}
425 extern u32 last_io_addr;
427 static void dump_mem(const char *fname, void *mem, size_t size)
429 FILE *f1 = fopen(fname, "wb");
431 f1 = fopen(strrchr(fname, '/') + 1, "wb");
432 fwrite(mem, 1, size, f1);
436 static u32 memcheck_read(u32 a)
438 if ((a >> 16) == 0x1f80)
440 return *(u32 *)(psxH + (a & 0xfffc));
442 if ((a >> 16) == 0x1f00)
444 return *(u32 *)(psxP + (a & 0xfffc));
446 // if ((a & ~0xe0600000) < 0x200000)
448 return *(u32 *)(psxM + (a & 0x1ffffc));
451 void do_insn_trace(void)
453 static psxRegisters oldregs;
454 static u32 old_io_addr = (u32)-1;
455 static u32 old_io_data = 0xbad0c0de;
456 u32 *allregs_p = (void *)&psxRegs;
457 u32 *allregs_o = (void *)&oldregs;
462 //last_io_addr = 0x5e2c8;
464 f = fopen("tracelog", "wb");
466 oldregs.code = psxRegs.code; // don't care
467 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
468 if (allregs_p[i] != allregs_o[i]) {
470 fwrite(&allregs_p[i], 1, 4, f);
471 allregs_o[i] = allregs_p[i];
474 if (old_io_addr != last_io_addr) {
476 fwrite(&byte, 1, 1, f);
477 fwrite(&last_io_addr, 1, 4, f);
478 old_io_addr = last_io_addr;
480 io_data = memcheck_read(last_io_addr);
481 if (old_io_data != io_data) {
483 fwrite(&byte, 1, 1, f);
484 fwrite(&io_data, 1, 4, f);
485 old_io_data = io_data;
488 fwrite(&byte, 1, 1, f);
491 if (psxRegs.cycle == 190230) {
492 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
493 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
500 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
501 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
502 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
503 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
504 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
506 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
507 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
508 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
509 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
511 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
512 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
513 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
514 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
516 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
517 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
518 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
519 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
521 "PC", "code", "cycle", "interrupt",
529 static int miss_log_i;
530 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
531 #define miss_log_mask (miss_log_len-1)
533 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
535 miss_log[miss_log_i].reg = reg;
536 miss_log[miss_log_i].val = val;
537 miss_log[miss_log_i].val_expect = val_expect;
538 miss_log[miss_log_i].pc = pc;
539 miss_log[miss_log_i].cycle = cycle;
540 miss_log_i = (miss_log_i + 1) & miss_log_mask;
545 void do_insn_cmp(void)
547 static psxRegisters rregs;
548 static u32 mem_addr, mem_val;
549 u32 *allregs_p = (void *)&psxRegs;
550 u32 *allregs_e = (void *)&rregs;
551 static u32 ppc, failcount;
556 f = fopen("tracelog", "rb");
559 if ((ret = fread(&code, 1, 1, f)) <= 0)
566 if ((ret = fread(&mem_addr, 1, 4, f)) <= 0)
571 if ((ret = fread(&mem_val, 1, 4, f)) <= 0)
575 if ((ret = fread(&allregs_e[code], 1, 4, f)) <= 0)
584 psxRegs.code = rregs.code; // don't care
585 psxRegs.cycle = rregs.cycle;
586 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
588 //if (psxRegs.cycle == 166172) breakme();
590 if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
591 mem_val == memcheck_read(mem_addr)
597 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
598 if (allregs_p[i] != allregs_e[i]) {
599 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
604 if (mem_val != memcheck_read(mem_addr)) {
605 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
609 if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
610 static int last_mcycle;
611 if (last_mcycle != psxRegs.cycle >> 20) {
612 printf("%u\n", psxRegs.cycle);
613 last_mcycle = psxRegs.cycle >> 20;
620 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
621 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
622 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
623 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
624 printf("-- %d\n", bad);
625 for (i = 0; i < 8; i++)
626 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
627 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
628 printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
629 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
630 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
633 psxRegs.cycle = rregs.cycle + 2; // sync timing