2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../r3000a.h"
15 #include "../psxdma.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 char invalid_code[0x100000];
28 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
29 u32 event_cycles[PSXINT_COUNT];
31 static void schedule_timeslice(void)
33 u32 i, c = psxRegs.cycle;
34 u32 irqs = psxRegs.interrupt;
38 for (i = 0; irqs != 0; i++, irqs >>= 1) {
41 dif = event_cycles[i] - c;
42 //evprintf(" ev %d\n", dif);
43 if (0 < dif && dif < min)
46 next_interupt = c + min;
49 typedef void (irq_func)();
51 static irq_func * const irq_funcs[] = {
52 [PSXINT_SIO] = sioInterrupt,
53 [PSXINT_CDR] = cdrInterrupt,
54 [PSXINT_CDREAD] = cdrReadInterrupt,
55 [PSXINT_GPUDMA] = gpuInterrupt,
56 [PSXINT_MDECOUTDMA] = mdec1Interrupt,
57 [PSXINT_SPUDMA] = spuInterrupt,
58 [PSXINT_MDECINDMA] = mdec0Interrupt,
59 [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
60 [PSXINT_CDRDMA] = cdrDmaInterrupt,
61 [PSXINT_CDRLID] = cdrLidSeekInterrupt,
62 [PSXINT_CDRPLAY] = cdrPlayInterrupt,
63 [PSXINT_RCNT] = psxRcntUpdate,
66 /* local dupe of psxBranchTest, using event_cycles */
67 static void irq_test(void)
69 u32 irqs = psxRegs.interrupt;
70 u32 cycle = psxRegs.cycle;
73 // irq_funcs() may queue more irqs
74 psxRegs.interrupt = 0;
76 for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
79 if ((s32)(cycle - event_cycles[irq]) >= 0) {
84 psxRegs.interrupt |= irqs;
86 if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
87 psxException(0x400, 0);
88 pending_exception = 1;
94 evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
98 //pending_exception = 1;
100 schedule_timeslice();
102 evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
103 next_interupt, next_interupt - psxRegs.cycle);
107 extern void MTC0(int reg, u32 val);
109 void pcsx_mtc0(u32 reg, u32 val)
111 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
114 if (Cause & Status & 0x0300) // possible sw irq
115 pending_exception = 1;
118 void pcsx_mtc0_ds(u32 reg, u32 val)
120 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
124 void new_dyna_save(void)
126 psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat
128 // psxRegs.intCycle is always maintained, no need to convert
131 void new_dyna_after_save(void)
133 psxRegs.interrupt |= 1 << PSXINT_RCNT;
136 void new_dyna_restore(void)
139 for (i = 0; i < PSXINT_COUNT; i++)
140 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
142 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
143 psxRegs.interrupt |= 1 << PSXINT_RCNT;
144 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
146 new_dyna_pcsx_mem_load_state();
150 void *gte_handlers[64];
152 void *gte_handlers_nf[64] = {
153 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
154 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
155 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
156 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
157 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
158 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
159 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
160 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
163 const char *gte_regnames[64] = {
164 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
165 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
166 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
167 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
168 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
169 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
170 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
171 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
174 /* from gte.txt.. not sure if this is any good. */
175 const char gte_cycletab[64] = {
176 /* 1 2 3 4 5 6 7 8 9 a b c d e f */
177 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0,
178 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0,
179 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0,
180 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39,
187 #define GCBITS3(b0,b1,b2) \
188 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
189 #define GDBITS2(b0,b1) \
190 (GDBIT(b0) | GDBIT(b1))
191 #define GDBITS3(b0,b1,b2) \
192 (GDBITS2(b0,b1) | GDBIT(b2))
193 #define GDBITS4(b0,b1,b2,b3) \
194 (GDBITS3(b0,b1,b2) | GDBIT(b3))
195 #define GDBITS5(b0,b1,b2,b3,b4) \
196 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
197 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
198 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
199 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
200 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
201 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
202 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
203 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
204 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
205 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
206 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
208 const uint64_t gte_reg_reads[64] = {
209 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
210 [GTE_NCLIP] = GDBITS3(12,13,14),
211 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
212 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
213 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
214 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
215 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
216 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
217 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
218 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
219 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
220 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
221 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
222 [GTE_SQR] = GDBITS3(9,10,11),
223 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
224 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
225 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
226 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
227 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
228 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
229 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
230 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
233 // note: this excludes gteFLAG that is always written to
234 const uint64_t gte_reg_writes[64] = {
235 [GTE_RTPS] = 0x0f0f7f00ll,
236 [GTE_NCLIP] = GDBIT(24),
237 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
238 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
239 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
240 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
241 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
242 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
243 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
244 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
245 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
246 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
247 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
248 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
249 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
250 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
251 [GTE_AVSZ3] = GDBITS2(7,24),
252 [GTE_AVSZ4] = GDBITS2(7,24),
253 [GTE_RTPT] = 0x0f0f7f00ll,
254 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
255 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
256 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
259 static int ari64_init()
261 extern void (*psxCP2[64])();
262 extern void psxNULL();
266 new_dyna_pcsx_mem_init();
268 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
269 if (psxCP2[i] != psxNULL)
270 gte_handlers[i] = psxCP2[i];
272 #if defined(__arm__) && !defined(DRC_DBG)
273 gte_handlers[0x06] = gteNCLIP_arm;
275 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
276 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
279 // compiler's _nf version is still a lot slower than neon
280 // _nf_arm RTPS is roughly the same, RTPT slower
281 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
282 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
286 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
289 zeromem_ptr = zero_mem;
290 scratch_buf_ptr = scratch_buf;
295 static void ari64_reset()
297 printf("ari64_reset\n");
298 new_dyna_pcsx_mem_reset();
299 invalidate_all_pages();
301 pending_exception = 1;
304 // execute until predefined leave points
305 // (HLE softcall exit and BIOS fastboot end)
306 static void ari64_execute_until()
308 schedule_timeslice();
310 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
311 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
315 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
316 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
319 static void ari64_execute()
322 ari64_execute_until();
323 evprintf("drc left @%08x\n", psxRegs.pc);
327 static void ari64_clear(u32 addr, u32 size)
329 u32 start, end, main_ram;
331 size *= 4; /* PCSX uses DMA units */
333 evprintf("ari64_clear %08x %04x\n", addr, size);
335 /* check for RAM mirrors */
336 main_ram = (addr & 0xffe00000) == 0x80000000;
339 end = (addr + size) >> 12;
341 for (; start <= end; start++)
342 if (!main_ram || !invalid_code[start])
343 invalidate_block(start);
346 static void ari64_shutdown()
348 new_dynarec_cleanup();
349 new_dyna_pcsx_mem_shutdown();
352 extern void intExecute();
353 extern void intExecuteT();
354 extern void intExecuteBlock();
355 extern void intExecuteBlockT();
357 #define intExecuteT intExecute
358 #define intExecuteBlockT intExecuteBlock
377 void do_insn_trace() {}
378 void do_insn_cmp() {}
382 unsigned int address;
383 int pending_exception, stop;
384 unsigned int next_interupt;
385 int new_dynarec_did_compile;
386 int cycle_multiplier;
387 int new_dynarec_hacks;
391 void *scratch_buf_ptr;
392 void new_dynarec_init() { (void)ari64_execute; }
393 void new_dyna_start() {}
394 void new_dynarec_cleanup() {}
395 void new_dynarec_clear_full() {}
396 void invalidate_all_pages() {}
397 void invalidate_block(unsigned int block) {}
398 void new_dyna_pcsx_mem_init(void) {}
399 void new_dyna_pcsx_mem_reset(void) {}
400 void new_dyna_pcsx_mem_load_state(void) {}
401 void new_dyna_pcsx_mem_shutdown(void) {}
408 extern u32 last_io_addr;
410 static void dump_mem(const char *fname, void *mem, size_t size)
412 FILE *f1 = fopen(fname, "wb");
414 f1 = fopen(strrchr(fname, '/') + 1, "wb");
415 fwrite(mem, 1, size, f1);
419 static u32 memcheck_read(u32 a)
421 if ((a >> 16) == 0x1f80)
423 return *(u32 *)(psxH + (a & 0xfffc));
425 if ((a >> 16) == 0x1f00)
427 return *(u32 *)(psxP + (a & 0xfffc));
429 // if ((a & ~0xe0600000) < 0x200000)
431 return *(u32 *)(psxM + (a & 0x1ffffc));
434 void do_insn_trace(void)
436 static psxRegisters oldregs;
437 static u32 old_io_addr = (u32)-1;
438 static u32 old_io_data = 0xbad0c0de;
439 static u32 event_cycles_o[PSXINT_COUNT];
440 u32 *allregs_p = (void *)&psxRegs;
441 u32 *allregs_o = (void *)&oldregs;
446 //last_io_addr = 0x5e2c8;
448 f = fopen("tracelog", "wb");
451 oldregs.code = psxRegs.code; // don't care
452 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
453 if (allregs_p[i] != allregs_o[i]) {
455 fwrite(&allregs_p[i], 1, 4, f);
456 allregs_o[i] = allregs_p[i];
460 for (i = 0; i < PSXINT_COUNT; i++) {
461 if (event_cycles[i] != event_cycles_o[i]) {
463 fwrite(&byte, 1, 1, f);
465 fwrite(&event_cycles[i], 1, 4, f);
466 event_cycles_o[i] = event_cycles[i];
470 if (old_io_addr != last_io_addr) {
472 fwrite(&byte, 1, 1, f);
473 fwrite(&last_io_addr, 1, 4, f);
474 old_io_addr = last_io_addr;
476 io_data = memcheck_read(last_io_addr);
477 if (old_io_data != io_data) {
479 fwrite(&byte, 1, 1, f);
480 fwrite(&io_data, 1, 4, f);
481 old_io_data = io_data;
484 fwrite(&byte, 1, 1, f);
487 if (psxRegs.cycle == 190230) {
488 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
489 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
496 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
497 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
498 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
499 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
500 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
502 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
503 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
504 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
505 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
507 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
508 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
509 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
510 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
512 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
513 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
514 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
515 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
517 "PC", "code", "cycle", "interrupt",
525 static int miss_log_i;
526 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
527 #define miss_log_mask (miss_log_len-1)
529 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
531 miss_log[miss_log_i].reg = reg;
532 miss_log[miss_log_i].val = val;
533 miss_log[miss_log_i].val_expect = val_expect;
534 miss_log[miss_log_i].pc = pc;
535 miss_log[miss_log_i].cycle = cycle;
536 miss_log_i = (miss_log_i + 1) & miss_log_mask;
541 void do_insn_cmp(void)
543 static psxRegisters rregs;
544 static u32 mem_addr, mem_val;
545 u32 *allregs_p = (void *)&psxRegs;
546 u32 *allregs_e = (void *)&rregs;
547 static u32 ppc, failcount;
548 int i, ret, bad = 0, which_event = -1;
553 f = fopen("tracelog", "rb");
556 if ((ret = fread(&code, 1, 1, f)) <= 0)
565 fread(&which_event, 1, 1, f);
566 fread(&ev_cycles, 1, 4, f);
569 fread(&mem_addr, 1, 4, f);
572 fread(&mem_val, 1, 4, f);
575 fread(&allregs_e[code], 1, 4, f);
583 psxRegs.code = rregs.code; // don't care
584 psxRegs.cycle = rregs.cycle;
585 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
587 //if (psxRegs.cycle == 166172) breakme();
589 if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
590 mem_val == memcheck_read(mem_addr)
596 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
597 if (allregs_p[i] != allregs_e[i]) {
598 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
603 if (mem_val != memcheck_read(mem_addr)) {
604 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
608 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
609 printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles);
613 if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
614 static int last_mcycle;
615 if (last_mcycle != psxRegs.cycle >> 20) {
616 printf("%u\n", psxRegs.cycle);
617 last_mcycle = psxRegs.cycle >> 20;
624 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
625 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
626 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
627 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
628 printf("-- %d\n", bad);
629 for (i = 0; i < 8; i++)
630 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
631 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
632 printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
633 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
634 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
637 psxRegs.cycle = rregs.cycle + 2; // sync timing