2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../r3000a.h"
15 #include "../psxdma.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 char invalid_code[0x100000];
28 u32 event_cycles[PSXINT_COUNT];
30 static void schedule_timeslice(void)
32 u32 i, c = psxRegs.cycle;
33 u32 irqs = psxRegs.interrupt;
37 for (i = 0; irqs != 0; i++, irqs >>= 1) {
40 dif = event_cycles[i] - c;
41 //evprintf(" ev %d\n", dif);
42 if (0 < dif && dif < min)
45 next_interupt = c + min;
48 typedef void (irq_func)();
50 static irq_func * const irq_funcs[] = {
51 [PSXINT_SIO] = sioInterrupt,
52 [PSXINT_CDR] = cdrInterrupt,
53 [PSXINT_CDREAD] = cdrReadInterrupt,
54 [PSXINT_GPUDMA] = gpuInterrupt,
55 [PSXINT_MDECOUTDMA] = mdec1Interrupt,
56 [PSXINT_SPUDMA] = spuInterrupt,
57 [PSXINT_MDECINDMA] = mdec0Interrupt,
58 [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
59 [PSXINT_CDRDMA] = cdrDmaInterrupt,
60 [PSXINT_CDRLID] = cdrLidSeekInterrupt,
61 [PSXINT_CDRPLAY] = cdrPlayInterrupt,
62 [PSXINT_RCNT] = psxRcntUpdate,
65 /* local dupe of psxBranchTest, using event_cycles */
66 static void irq_test(void)
68 u32 irqs = psxRegs.interrupt;
69 u32 cycle = psxRegs.cycle;
72 // irq_funcs() may queue more irqs
73 psxRegs.interrupt = 0;
75 for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
78 if ((s32)(cycle - event_cycles[irq]) >= 0) {
83 psxRegs.interrupt |= irqs;
85 if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
86 psxException(0x400, 0);
87 pending_exception = 1;
93 evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
97 //pending_exception = 1;
101 evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
102 next_interupt, next_interupt - psxRegs.cycle);
106 extern void MTC0(int reg, u32 val);
108 void pcsx_mtc0(u32 reg, u32 val)
110 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
115 void pcsx_mtc0_ds(u32 reg, u32 val)
117 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
121 void new_dyna_save(void)
123 psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat
125 // psxRegs.intCycle is always maintained, no need to convert
128 void new_dyna_after_save(void)
130 psxRegs.interrupt |= 1 << PSXINT_RCNT;
133 void new_dyna_restore(void)
136 for (i = 0; i < PSXINT_COUNT; i++)
137 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
139 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
140 psxRegs.interrupt |= 1 << PSXINT_RCNT;
141 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
143 new_dyna_pcsx_mem_load_state();
147 void *gte_handlers[64];
149 void *gte_handlers_nf[64] = {
150 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
151 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
152 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
153 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
154 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
155 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
156 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
157 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
160 const char *gte_regnames[64] = {
161 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
162 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
163 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
164 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
165 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
166 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
167 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
168 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
171 /* from gte.txt.. not sure if this is any good. */
172 const char gte_cycletab[64] = {
173 /* 1 2 3 4 5 6 7 8 9 a b c d e f */
174 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0,
175 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0,
176 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0,
177 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39,
184 #define GCBITS3(b0,b1,b2) \
185 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
186 #define GDBITS2(b0,b1) \
187 (GDBIT(b0) | GDBIT(b1))
188 #define GDBITS3(b0,b1,b2) \
189 (GDBITS2(b0,b1) | GDBIT(b2))
190 #define GDBITS4(b0,b1,b2,b3) \
191 (GDBITS3(b0,b1,b2) | GDBIT(b3))
192 #define GDBITS5(b0,b1,b2,b3,b4) \
193 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
194 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
195 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
196 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
197 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
198 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
199 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
200 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
201 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
202 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
203 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
205 const uint64_t gte_reg_reads[64] = {
206 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
207 [GTE_NCLIP] = GDBITS3(12,13,14),
208 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
209 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
210 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
211 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further?
212 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22),
213 [GTE_CDP] = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22),
214 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
215 [GTE_NCCS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
216 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
217 [GTE_NCS] = 0x001fff0000000000ll | GDBITS4(0,1,21,22),
218 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
219 [GTE_SQR] = GDBITS3(9,10,11),
220 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
221 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
222 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
223 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
224 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
225 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
226 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
227 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
230 // note: this excludes gteFLAG that is always written to
231 const uint64_t gte_reg_writes[64] = {
232 [GTE_RTPS] = 0x0f0f7f00ll,
233 [GTE_NCLIP] = GDBIT(24),
234 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
235 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
236 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
237 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
238 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
239 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
240 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
241 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
242 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
243 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
244 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
245 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
246 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
247 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
248 [GTE_AVSZ3] = GDBITS2(7,24),
249 [GTE_AVSZ4] = GDBITS2(7,24),
250 [GTE_RTPT] = 0x0f0f7f00ll,
251 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
252 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
253 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
256 static int ari64_init()
258 extern void (*psxCP2[64])();
259 extern void psxNULL();
263 new_dyna_pcsx_mem_init();
265 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
266 if (psxCP2[i] != psxNULL)
267 gte_handlers[i] = psxCP2[i];
269 #if !defined(DRC_DBG)
271 gte_handlers[0x06] = gteNCLIP_arm;
272 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
273 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
276 // compiler's _nf version is still a lot slower than neon
277 // _nf_arm RTPS is roughly the same, RTPT slower
278 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
279 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
283 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
286 zeromem_ptr = zero_mem;
291 static void ari64_reset()
293 printf("ari64_reset\n");
294 new_dyna_pcsx_mem_reset();
295 invalidate_all_pages();
297 pending_exception = 1;
300 // execute until predefined leave points
301 // (HLE softcall exit and BIOS fastboot end)
302 static void ari64_execute_until()
304 schedule_timeslice();
306 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
307 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
311 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
312 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
315 static void ari64_execute()
318 ari64_execute_until();
319 evprintf("drc left @%08x\n", psxRegs.pc);
323 static void ari64_clear(u32 addr, u32 size)
325 u32 start, end, main_ram;
327 size *= 4; /* PCSX uses DMA units */
329 evprintf("ari64_clear %08x %04x\n", addr, size);
331 /* check for RAM mirrors */
332 main_ram = (addr & 0xffe00000) == 0x80000000;
335 end = (addr + size) >> 12;
337 for (; start <= end; start++)
338 if (!main_ram || !invalid_code[start])
339 invalidate_block(start);
342 static void ari64_shutdown()
344 new_dynarec_cleanup();
347 extern void intExecute();
348 extern void intExecuteT();
349 extern void intExecuteBlock();
350 extern void intExecuteBlockT();
352 #define intExecuteT intExecute
353 #define intExecuteBlockT intExecuteBlock
372 void do_insn_trace() {}
373 void do_insn_cmp() {}
376 #if defined(__x86_64__) || defined(__i386__)
377 unsigned int address;
378 int pending_exception, stop;
379 unsigned int next_interupt;
380 int new_dynarec_did_compile;
381 int cycle_multiplier;
382 int new_dynarec_hacks;
386 void new_dynarec_init() {}
387 void new_dyna_start() {}
388 void new_dynarec_cleanup() {}
389 void new_dynarec_clear_full() {}
390 void invalidate_all_pages() {}
391 void invalidate_block(unsigned int block) {}
392 void new_dyna_pcsx_mem_init(void) {}
393 void new_dyna_pcsx_mem_reset(void) {}
394 void new_dyna_pcsx_mem_load_state(void) {}
401 extern u32 last_io_addr;
403 static void dump_mem(const char *fname, void *mem, size_t size)
405 FILE *f1 = fopen(fname, "wb");
407 f1 = fopen(strrchr(fname, '/') + 1, "wb");
408 fwrite(mem, 1, size, f1);
412 static u32 memcheck_read(u32 a)
414 if ((a >> 16) == 0x1f80)
416 return *(u32 *)(psxH + (a & 0xfffc));
418 if ((a >> 16) == 0x1f00)
420 return *(u32 *)(psxP + (a & 0xfffc));
422 // if ((a & ~0xe0600000) < 0x200000)
424 return *(u32 *)(psxM + (a & 0x1ffffc));
427 void do_insn_trace(void)
429 static psxRegisters oldregs;
430 static u32 old_io_addr = (u32)-1;
431 static u32 old_io_data = 0xbad0c0de;
432 static u32 event_cycles_o[PSXINT_COUNT];
433 u32 *allregs_p = (void *)&psxRegs;
434 u32 *allregs_o = (void *)&oldregs;
439 //last_io_addr = 0x5e2c8;
441 f = fopen("tracelog", "wb");
444 oldregs.code = psxRegs.code; // don't care
445 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
446 if (allregs_p[i] != allregs_o[i]) {
448 fwrite(&allregs_p[i], 1, 4, f);
449 allregs_o[i] = allregs_p[i];
453 for (i = 0; i < PSXINT_COUNT; i++) {
454 if (event_cycles[i] != event_cycles_o[i]) {
456 fwrite(&byte, 1, 1, f);
458 fwrite(&event_cycles[i], 1, 4, f);
459 event_cycles_o[i] = event_cycles[i];
463 if (old_io_addr != last_io_addr) {
465 fwrite(&byte, 1, 1, f);
466 fwrite(&last_io_addr, 1, 4, f);
467 old_io_addr = last_io_addr;
469 io_data = memcheck_read(last_io_addr);
470 if (old_io_data != io_data) {
472 fwrite(&byte, 1, 1, f);
473 fwrite(&io_data, 1, 4, f);
474 old_io_data = io_data;
477 fwrite(&byte, 1, 1, f);
480 if (psxRegs.cycle == 190230) {
481 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
482 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
489 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
490 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
491 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
492 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
493 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
495 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
496 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
497 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
498 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
500 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
501 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
502 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
503 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
505 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
506 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
507 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
508 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
510 "PC", "code", "cycle", "interrupt",
518 static int miss_log_i;
519 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
520 #define miss_log_mask (miss_log_len-1)
522 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
524 miss_log[miss_log_i].reg = reg;
525 miss_log[miss_log_i].val = val;
526 miss_log[miss_log_i].val_expect = val_expect;
527 miss_log[miss_log_i].pc = pc;
528 miss_log[miss_log_i].cycle = cycle;
529 miss_log_i = (miss_log_i + 1) & miss_log_mask;
534 void do_insn_cmp(void)
536 static psxRegisters rregs;
537 static u32 mem_addr, mem_val;
538 u32 *allregs_p = (void *)&psxRegs;
539 u32 *allregs_e = (void *)&rregs;
540 static u32 ppc, failcount;
541 int i, ret, bad = 0, which_event = -1;
546 f = fopen("tracelog", "rb");
549 if ((ret = fread(&code, 1, 1, f)) <= 0)
558 fread(&which_event, 1, 1, f);
559 fread(&ev_cycles, 1, 4, f);
562 fread(&mem_addr, 1, 4, f);
565 fread(&mem_val, 1, 4, f);
568 fread(&allregs_e[code], 1, 4, f);
576 psxRegs.code = rregs.code; // don't care
577 psxRegs.cycle = rregs.cycle;
578 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
580 //if (psxRegs.cycle == 166172) breakme();
582 if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
583 mem_val == memcheck_read(mem_addr)
589 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
590 if (allregs_p[i] != allregs_e[i]) {
591 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
596 if (mem_val != memcheck_read(mem_addr)) {
597 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
601 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
602 printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles);
606 if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
607 static int last_mcycle;
608 if (last_mcycle != psxRegs.cycle >> 20) {
609 printf("%u\n", psxRegs.cycle);
610 last_mcycle = psxRegs.cycle >> 20;
617 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
618 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
619 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
620 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
621 printf("-- %d\n", bad);
622 for (i = 0; i < 8; i++)
623 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
624 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
625 printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
626 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
627 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
630 psxRegs.cycle = rregs.cycle + 2; // sync timing