2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
13 #include "../psxhle.h"
14 #include "../psxinterpreter.h"
15 #include "../r3000a.h"
16 #include "../gte_arm.h"
17 #include "../gte_neon.h"
21 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23 //#define evprintf printf
26 void pcsx_mtc0(u32 reg, u32 val)
28 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
29 MTC0(&psxRegs, reg, val);
30 gen_interupt(&psxRegs.CP0);
32 //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq
33 if ((psxRegs.pc & 0x803ffeff) == 0x80000080)
34 pending_exception = 1;
37 void pcsx_mtc0_ds(u32 reg, u32 val)
39 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
40 MTC0(&psxRegs, reg, val);
43 static void new_dyna_restore(void)
46 for (i = 0; i < PSXINT_COUNT; i++)
47 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
49 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
50 psxRegs.interrupt |= 1 << PSXINT_RCNT;
51 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
53 new_dyna_pcsx_mem_load_state();
56 void new_dyna_freeze(void *f, int mode)
58 const char header_save[8] = "ariblks";
59 uint32_t addrs[1024 * 4];
64 if (mode != 0) { // save
65 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
69 SaveFuncs.write(f, header_save, sizeof(header_save));
70 SaveFuncs.write(f, &size, sizeof(size));
71 SaveFuncs.write(f, addrs, size);
76 bytes = SaveFuncs.read(f, header, sizeof(header));
77 if (bytes != sizeof(header) || strcmp(header, header_save)) {
79 SaveFuncs.seek(f, -bytes, SEEK_CUR);
82 SaveFuncs.read(f, &size, sizeof(size));
85 if (size > sizeof(addrs)) {
86 bytes = size - sizeof(addrs);
87 SaveFuncs.seek(f, bytes, SEEK_CUR);
90 bytes = SaveFuncs.read(f, addrs, size);
94 if (psxCpu != &psxInt)
95 new_dynarec_load_blocks(addrs, size);
98 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
101 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
104 void *gte_handlers[64];
106 void *gte_handlers_nf[64] = {
107 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
108 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
109 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
110 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
111 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
112 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
113 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
114 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
117 const char *gte_regnames[64] = {
118 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
119 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
120 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
121 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
122 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
123 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
124 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
125 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
132 #define GCBITS3(b0,b1,b2) \
133 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
134 #define GDBITS2(b0,b1) \
135 (GDBIT(b0) | GDBIT(b1))
136 #define GDBITS3(b0,b1,b2) \
137 (GDBITS2(b0,b1) | GDBIT(b2))
138 #define GDBITS4(b0,b1,b2,b3) \
139 (GDBITS3(b0,b1,b2) | GDBIT(b3))
140 #define GDBITS5(b0,b1,b2,b3,b4) \
141 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
142 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
143 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
144 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
145 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
146 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
147 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
148 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
149 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
150 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
151 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
153 const uint64_t gte_reg_reads[64] = {
154 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
155 [GTE_NCLIP] = GDBITS3(12,13,14),
156 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
157 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
158 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
159 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
160 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
161 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
162 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
163 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
164 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
165 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
166 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
167 [GTE_SQR] = GDBITS3(9,10,11),
168 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
169 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
170 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
171 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
172 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
173 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
174 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
175 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
178 // note: this excludes gteFLAG that is always written to
179 const uint64_t gte_reg_writes[64] = {
180 [GTE_RTPS] = 0x0f0f7f00ll,
181 [GTE_NCLIP] = GDBIT(24),
182 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
183 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
184 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
185 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
186 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
187 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
188 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
189 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
190 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
191 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
192 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
193 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
194 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
195 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
196 [GTE_AVSZ3] = GDBITS2(7,24),
197 [GTE_AVSZ4] = GDBITS2(7,24),
198 [GTE_RTPT] = 0x0f0f7f00ll,
199 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
200 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
201 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
204 static int ari64_init()
206 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
210 new_dyna_pcsx_mem_init();
212 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
213 if (psxCP2[i] != gteNULL)
214 gte_handlers[i] = psxCP2[i];
216 #if defined(__arm__) && !defined(DRC_DBG)
217 gte_handlers[0x06] = gteNCLIP_arm;
219 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
220 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
223 // compiler's _nf version is still a lot slower than neon
224 // _nf_arm RTPS is roughly the same, RTPT slower
225 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
226 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
230 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
233 zeromem_ptr = zero_mem;
234 scratch_buf_ptr = scratch_buf;
239 static void ari64_reset()
241 new_dyna_pcsx_mem_reset();
242 new_dynarec_invalidate_all_pages();
244 pending_exception = 1;
247 // execute until predefined leave points
248 // (HLE softcall exit and BIOS fastboot end)
249 static void ari64_execute_until()
251 schedule_timeslice();
253 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
254 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
256 new_dyna_start(dynarec_local);
258 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
259 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
262 static void ari64_execute()
265 ari64_execute_until();
266 evprintf("drc left @%08x\n", psxRegs.pc);
270 static void ari64_execute_block(enum blockExecCaller caller)
272 if (caller == EXEC_CALLER_BOOT)
275 ari64_execute_until();
277 if (caller == EXEC_CALLER_BOOT)
281 static void ari64_clear(u32 addr, u32 size)
283 size *= 4; /* PCSX uses DMA units (words) */
285 evprintf("ari64_clear %08x %04x\n", addr, size);
287 new_dynarec_invalidate_range(addr, addr + size);
290 static void ari64_notify(enum R3000Anote note, void *data) {
293 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
294 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
295 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
297 case R3000ACPU_NOTIFY_BEFORE_SAVE:
299 case R3000ACPU_NOTIFY_AFTER_LOAD:
305 static void ari64_apply_config()
309 if (Config.DisableStalls)
310 new_dynarec_hacks |= NDHACK_NO_STALLS;
312 new_dynarec_hacks &= ~NDHACK_NO_STALLS;
314 if (Config.cycle_multiplier != cycle_multiplier_old
315 || new_dynarec_hacks != new_dynarec_hacks_old)
317 new_dynarec_clear_full();
321 static void ari64_shutdown()
323 new_dynarec_cleanup();
324 new_dyna_pcsx_mem_shutdown();
338 #else // if DRC_DISABLE
340 unsigned int address;
341 int pending_exception, stop;
343 int new_dynarec_did_compile;
344 int cycle_multiplier_old;
345 int new_dynarec_hacks_pergame;
346 int new_dynarec_hacks_old;
347 int new_dynarec_hacks;
350 u32 zero_mem[0x1000/4];
352 void *scratch_buf_ptr;
353 void new_dynarec_init() {}
354 void new_dyna_start(void *context) {}
355 void new_dynarec_cleanup() {}
356 void new_dynarec_clear_full() {}
357 void new_dynarec_invalidate_all_pages() {}
358 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
359 void new_dyna_pcsx_mem_init(void) {}
360 void new_dyna_pcsx_mem_reset(void) {}
361 void new_dyna_pcsx_mem_load_state(void) {}
362 void new_dyna_pcsx_mem_isolate(int enable) {}
363 void new_dyna_pcsx_mem_shutdown(void) {}
364 int new_dynarec_save_blocks(void *save, int size) { return 0; }
365 void new_dynarec_load_blocks(const void *save, int size) {}
376 void dump_mem(const char *fname, void *mem, size_t size)
378 FILE *f1 = fopen(fname, "wb");
380 f1 = fopen(strrchr(fname, '/') + 1, "wb");
381 fwrite(mem, 1, size, f1);
385 static u32 memcheck_read(u32 a)
387 if ((a >> 16) == 0x1f80)
389 return *(u32 *)(psxH + (a & 0xfffc));
391 if ((a >> 16) == 0x1f00)
393 return *(u32 *)(psxP + (a & 0xfffc));
395 // if ((a & ~0xe0600000) < 0x200000)
397 return *(u32 *)(psxM + (a & 0x1ffffc));
401 void do_insn_trace(void)
403 static psxRegisters oldregs;
404 static u32 event_cycles_o[PSXINT_COUNT];
405 u32 *allregs_p = (void *)&psxRegs;
406 u32 *allregs_o = (void *)&oldregs;
411 //last_io_addr = 0x5e2c8;
413 f = fopen("tracelog", "wb");
416 oldregs.code = psxRegs.code; // don't care
417 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
418 if (allregs_p[i] != allregs_o[i]) {
420 fwrite(&allregs_p[i], 1, 4, f);
421 allregs_o[i] = allregs_p[i];
425 for (i = 0; i < PSXINT_COUNT; i++) {
426 if (event_cycles[i] != event_cycles_o[i]) {
428 fwrite(&byte, 1, 1, f);
430 fwrite(&event_cycles[i], 1, 4, f);
431 event_cycles_o[i] = event_cycles[i];
434 #define SAVE_IF_CHANGED(code_, name_) { \
435 static u32 old_##name_ = 0xbad0c0de; \
436 if (old_##name_ != name_) { \
438 fwrite(&byte, 1, 1, f); \
439 fwrite(&name_, 1, 4, f); \
440 old_##name_ = name_; \
443 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
444 SAVE_IF_CHANGED(0xfc, handler_cycle);
445 SAVE_IF_CHANGED(0xfd, last_io_addr);
446 io_data = memcheck_read(last_io_addr);
447 SAVE_IF_CHANGED(0xfe, io_data);
449 fwrite(&byte, 1, 1, f);
452 if (psxRegs.cycle == 190230) {
453 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
454 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
462 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
463 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
464 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
465 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
466 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
468 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
469 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
470 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
471 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
473 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
474 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
475 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
476 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
478 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
479 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
480 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
481 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
483 "PC", "code", "cycle", "interrupt",
491 static int miss_log_i;
492 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
493 #define miss_log_mask (miss_log_len-1)
495 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
497 miss_log[miss_log_i].reg = reg;
498 miss_log[miss_log_i].val = val;
499 miss_log[miss_log_i].val_expect = val_expect;
500 miss_log[miss_log_i].pc = pc;
501 miss_log[miss_log_i].cycle = cycle;
502 miss_log_i = (miss_log_i + 1) & miss_log_mask;
507 void do_insn_cmp(void)
509 extern int last_count;
510 static psxRegisters rregs;
511 static u32 mem_addr, mem_val;
512 static u32 irq_test_cycle_intr;
513 static u32 handler_cycle_intr;
514 u32 *allregs_p = (void *)&psxRegs;
515 u32 *allregs_e = (void *)&rregs;
516 u32 badregs_mask = 0;
517 static u32 ppc, failcount;
518 static u32 badregs_mask_prev;
519 int i, ret, bad = 0, fatal = 0, which_event = -1;
524 f = fopen("tracelog", "rb");
527 if ((ret = fread(&code, 1, 1, f)) <= 0)
536 fread(&which_event, 1, 1, f);
537 fread(&ev_cycles, 1, 4, f);
540 fread(&irq_test_cycle_intr, 1, 4, f);
543 fread(&handler_cycle_intr, 1, 4, f);
546 fread(&mem_addr, 1, 4, f);
549 fread(&mem_val, 1, 4, f);
552 assert(code < offsetof(psxRegisters, intCycle) / 4);
553 fread(&allregs_e[code], 1, 4, f);
561 psxRegs.code = rregs.code; // don't care
562 psxRegs.cycle += last_count;
563 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
564 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
566 //if (psxRegs.cycle == 166172) breakme();
568 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
569 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
570 event_cycles[which_event], ev_cycles, psxRegs.cycle);
574 if (irq_test_cycle > irq_test_cycle_intr) {
575 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
579 if (handler_cycle != handler_cycle_intr) {
580 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
584 if (mem_val != memcheck_read(mem_addr)) {
585 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
589 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
594 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
595 if (allregs_p[i] != allregs_e[i]) {
596 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
601 badregs_mask |= 1u << i;
605 if (badregs_mask_prev & badregs_mask)
610 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) {
611 static int last_mcycle;
612 if (last_mcycle != psxRegs.cycle >> 20) {
613 printf("%u\n", psxRegs.cycle);
614 last_mcycle = psxRegs.cycle >> 20;
619 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
620 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
621 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
622 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
623 printf("-- %d\n", bad);
624 for (i = 0; i < 8; i++)
625 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
626 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
627 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
628 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
629 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
632 //psxRegs.cycle = rregs.cycle + 2; // sync timing
634 badregs_mask_prev = badregs_mask;