2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../r3000a.h"
15 #include "../psxdma.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 char invalid_code[0x100000];
28 u32 event_cycles[PSXINT_COUNT];
30 static void schedule_timeslice(void)
32 u32 i, c = psxRegs.cycle;
35 min = psxNextsCounter + psxNextCounter - c;
36 for (i = 0; i < ARRAY_SIZE(event_cycles); i++) {
37 dif = event_cycles[i] - c;
38 //evprintf(" ev %d\n", dif);
39 if (0 < dif && dif < min)
42 next_interupt = c + min;
45 static u32 cnt, last_cycle;
49 sum += psxRegs.cycle - last_cycle;
50 if ((cnt & 0xff) == 0)
51 printf("%u\n", (u32)(sum / cnt));
53 last_cycle = psxRegs.cycle;
57 typedef void (irq_func)();
59 static irq_func * const irq_funcs[] = {
60 [PSXINT_SIO] = sioInterrupt,
61 [PSXINT_CDR] = cdrInterrupt,
62 [PSXINT_CDREAD] = cdrReadInterrupt,
63 [PSXINT_GPUDMA] = gpuInterrupt,
64 [PSXINT_MDECOUTDMA] = mdec1Interrupt,
65 [PSXINT_SPUDMA] = spuInterrupt,
66 [PSXINT_MDECINDMA] = mdec0Interrupt,
67 [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
68 [PSXINT_CDRDMA] = cdrDmaInterrupt,
69 [PSXINT_CDRLID] = cdrLidSeekInterrupt,
70 [PSXINT_CDRPLAY] = cdrPlayInterrupt,
73 /* local dupe of psxBranchTest, using event_cycles */
74 static void irq_test(void)
76 u32 irqs = psxRegs.interrupt;
77 u32 cycle = psxRegs.cycle;
80 if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
83 // irq_funcs() may queue more irqs
84 psxRegs.interrupt = 0;
86 for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
89 if ((s32)(cycle - event_cycles[irq]) >= 0) {
94 psxRegs.interrupt |= irqs;
96 if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
97 psxException(0x400, 0);
98 pending_exception = 1;
104 evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
108 //pending_exception = 1;
110 schedule_timeslice();
112 evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
113 next_interupt, next_interupt - psxRegs.cycle);
117 extern void MTC0(int reg, u32 val);
119 void pcsx_mtc0(u32 reg, u32 val)
121 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
126 void pcsx_mtc0_ds(u32 reg, u32 val)
128 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
132 void new_dyna_save(void)
134 // psxRegs.intCycle is always maintained, no need to convert
137 void new_dyna_restore(void)
140 for (i = 0; i < PSXINT_COUNT; i++)
141 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
143 new_dyna_pcsx_mem_load_state();
147 void *gte_handlers[64];
149 void *gte_handlers_nf[64] = {
150 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
151 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
152 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
153 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
154 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
155 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
156 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
157 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
160 const char *gte_regnames[64] = {
161 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
162 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
163 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
164 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
165 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
166 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
167 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
168 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
171 /* from gte.txt.. not sure if this is any good. */
172 const char gte_cycletab[64] = {
173 /* 1 2 3 4 5 6 7 8 9 a b c d e f */
174 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0,
175 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0,
176 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0,
177 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39,
184 #define GCBITS3(b0,b1,b2) \
185 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
186 #define GDBITS2(b0,b1) \
187 (GDBIT(b0) | GDBIT(b1))
188 #define GDBITS3(b0,b1,b2) \
189 (GDBITS2(b0,b1) | GDBIT(b2))
190 #define GDBITS4(b0,b1,b2,b3) \
191 (GDBITS3(b0,b1,b2) | GDBIT(b3))
192 #define GDBITS5(b0,b1,b2,b3,b4) \
193 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
194 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
195 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
196 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
197 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
198 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
199 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
200 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
201 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
202 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
203 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
205 const uint64_t gte_reg_reads[64] = {
206 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
207 [GTE_NCLIP] = GDBITS3(12,13,14),
208 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
209 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
210 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
211 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further?
212 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22),
213 [GTE_CDP] = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22),
214 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
215 [GTE_NCCS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
216 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
217 [GTE_NCS] = 0x001fff0000000000ll | GDBITS4(0,1,21,22),
218 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
219 [GTE_SQR] = GDBITS3(9,10,11),
220 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
221 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
222 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
223 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
224 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
225 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
226 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
227 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
230 // note: this excludes gteFLAG that is always written to
231 const uint64_t gte_reg_writes[64] = {
232 [GTE_RTPS] = 0x0f0f7f00ll,
233 [GTE_NCLIP] = GDBIT(24),
234 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
235 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
236 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
237 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
238 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
239 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
240 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
241 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
242 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
243 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
244 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
245 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
246 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
247 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
248 [GTE_AVSZ3] = GDBITS2(7,24),
249 [GTE_AVSZ4] = GDBITS2(7,24),
250 [GTE_RTPT] = 0x0f0f7f00ll,
251 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
252 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
253 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
256 static int ari64_init()
258 extern void (*psxCP2[64])();
259 extern void psxNULL();
263 new_dyna_pcsx_mem_init();
265 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
266 if (psxCP2[i] != psxNULL)
267 gte_handlers[i] = psxCP2[i];
269 #if !defined(DRC_DBG)
271 gte_handlers[0x06] = gteNCLIP_arm;
272 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
273 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
276 // compiler's _nf version is still a lot slower than neon
277 // _nf_arm RTPS is roughly the same, RTPT slower
278 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
279 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
283 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
286 zeromem_ptr = zero_mem;
291 static void ari64_reset()
293 printf("ari64_reset\n");
294 new_dyna_pcsx_mem_reset();
295 invalidate_all_pages();
297 pending_exception = 1;
300 // execute until predefined leave points
301 // (HLE softcall exit and BIOS fastboot end)
302 static void ari64_execute_until()
304 schedule_timeslice();
306 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
307 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
311 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
312 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
315 static void ari64_execute()
318 ari64_execute_until();
319 evprintf("drc left @%08x\n", psxRegs.pc);
323 static void ari64_clear(u32 addr, u32 size)
325 u32 start, end, main_ram;
327 size *= 4; /* PCSX uses DMA units */
329 evprintf("ari64_clear %08x %04x\n", addr, size);
331 /* check for RAM mirrors */
332 main_ram = (addr & 0xffe00000) == 0x80000000;
335 end = (addr + size) >> 12;
337 for (; start <= end; start++)
338 if (!main_ram || !invalid_code[start])
339 invalidate_block(start);
342 static void ari64_shutdown()
344 new_dynarec_cleanup();
347 extern void intExecute();
348 extern void intExecuteT();
349 extern void intExecuteBlock();
350 extern void intExecuteBlockT();
352 #define intExecuteT intExecute
353 #define intExecuteBlockT intExecuteBlock
372 void do_insn_trace() {}
373 void do_insn_cmp() {}
376 #if defined(__x86_64__) || defined(__i386__)
377 unsigned int address;
378 int pending_exception, stop;
379 unsigned int next_interupt;
380 int new_dynarec_did_compile;
381 int cycle_multiplier;
382 int new_dynarec_hacks;
386 void new_dynarec_init() {}
387 void new_dyna_start() {}
388 void new_dynarec_cleanup() {}
389 void new_dynarec_clear_full() {}
390 void invalidate_all_pages() {}
391 void invalidate_block(unsigned int block) {}
392 void new_dyna_pcsx_mem_init(void) {}
393 void new_dyna_pcsx_mem_reset(void) {}
394 void new_dyna_pcsx_mem_load_state(void) {}
401 extern u32 last_io_addr;
403 static void dump_mem(const char *fname, void *mem, size_t size)
405 FILE *f1 = fopen(fname, "wb");
407 f1 = fopen(strrchr(fname, '/') + 1, "wb");
408 fwrite(mem, 1, size, f1);
412 static u32 memcheck_read(u32 a)
414 if ((a >> 16) == 0x1f80)
416 return *(u32 *)(psxH + (a & 0xfffc));
418 if ((a >> 16) == 0x1f00)
420 return *(u32 *)(psxP + (a & 0xfffc));
422 // if ((a & ~0xe0600000) < 0x200000)
424 return *(u32 *)(psxM + (a & 0x1ffffc));
427 void do_insn_trace(void)
429 static psxRegisters oldregs;
430 static u32 old_io_addr = (u32)-1;
431 static u32 old_io_data = 0xbad0c0de;
432 u32 *allregs_p = (void *)&psxRegs;
433 u32 *allregs_o = (void *)&oldregs;
438 //last_io_addr = 0x5e2c8;
440 f = fopen("tracelog", "wb");
442 oldregs.code = psxRegs.code; // don't care
443 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
444 if (allregs_p[i] != allregs_o[i]) {
446 fwrite(&allregs_p[i], 1, 4, f);
447 allregs_o[i] = allregs_p[i];
450 if (old_io_addr != last_io_addr) {
452 fwrite(&byte, 1, 1, f);
453 fwrite(&last_io_addr, 1, 4, f);
454 old_io_addr = last_io_addr;
456 io_data = memcheck_read(last_io_addr);
457 if (old_io_data != io_data) {
459 fwrite(&byte, 1, 1, f);
460 fwrite(&io_data, 1, 4, f);
461 old_io_data = io_data;
464 fwrite(&byte, 1, 1, f);
467 if (psxRegs.cycle == 190230) {
468 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
469 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
476 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
477 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
478 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
479 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
480 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
482 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
483 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
484 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
485 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
487 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
488 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
489 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
490 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
492 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
493 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
494 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
495 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
497 "PC", "code", "cycle", "interrupt",
505 static int miss_log_i;
506 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
507 #define miss_log_mask (miss_log_len-1)
509 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
511 miss_log[miss_log_i].reg = reg;
512 miss_log[miss_log_i].val = val;
513 miss_log[miss_log_i].val_expect = val_expect;
514 miss_log[miss_log_i].pc = pc;
515 miss_log[miss_log_i].cycle = cycle;
516 miss_log_i = (miss_log_i + 1) & miss_log_mask;
521 void do_insn_cmp(void)
523 static psxRegisters rregs;
524 static u32 mem_addr, mem_val;
525 u32 *allregs_p = (void *)&psxRegs;
526 u32 *allregs_e = (void *)&rregs;
527 static u32 ppc, failcount;
532 f = fopen("tracelog", "rb");
535 if ((ret = fread(&code, 1, 1, f)) <= 0)
542 if ((ret = fread(&mem_addr, 1, 4, f)) <= 0)
547 if ((ret = fread(&mem_val, 1, 4, f)) <= 0)
551 if ((ret = fread(&allregs_e[code], 1, 4, f)) <= 0)
560 psxRegs.code = rregs.code; // don't care
561 psxRegs.cycle = rregs.cycle;
562 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
564 //if (psxRegs.cycle == 166172) breakme();
566 if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
567 mem_val == memcheck_read(mem_addr)
573 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
574 if (allregs_p[i] != allregs_e[i]) {
575 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
580 if (mem_val != memcheck_read(mem_addr)) {
581 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
585 if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
586 static int last_mcycle;
587 if (last_mcycle != psxRegs.cycle >> 20) {
588 printf("%u\n", psxRegs.cycle);
589 last_mcycle = psxRegs.cycle >> 20;
596 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
597 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
598 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
599 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
600 printf("-- %d\n", bad);
601 for (i = 0; i < 8; i++)
602 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
603 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
604 printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
605 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
606 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
609 psxRegs.cycle = rregs.cycle + 2; // sync timing