2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
13 #include "../psxhle.h"
14 #include "../psxinterpreter.h"
15 #include "../r3000a.h"
16 #include "../gte_arm.h"
17 #include "../gte_neon.h"
21 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23 //#define evprintf printf
26 void pcsx_mtc0(u32 reg, u32 val)
28 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
29 MTC0(&psxRegs, reg, val);
30 gen_interupt(&psxRegs.CP0);
32 //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq
33 if ((psxRegs.pc & 0x803ffeff) == 0x80000080)
34 pending_exception = 1;
37 void pcsx_mtc0_ds(u32 reg, u32 val)
39 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
40 MTC0(&psxRegs, reg, val);
43 static void new_dyna_restore(void)
46 for (i = 0; i < PSXINT_COUNT; i++)
47 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
49 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
50 psxRegs.interrupt |= 1 << PSXINT_RCNT;
51 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
53 new_dyna_pcsx_mem_load_state();
56 void new_dyna_freeze(void *f, int mode)
58 const char header_save[8] = "ariblks";
59 uint32_t addrs[1024 * 4];
64 if (mode != 0) { // save
65 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
69 SaveFuncs.write(f, header_save, sizeof(header_save));
70 SaveFuncs.write(f, &size, sizeof(size));
71 SaveFuncs.write(f, addrs, size);
76 bytes = SaveFuncs.read(f, header, sizeof(header));
77 if (bytes != sizeof(header) || strcmp(header, header_save)) {
79 SaveFuncs.seek(f, -bytes, SEEK_CUR);
82 SaveFuncs.read(f, &size, sizeof(size));
85 if (size > sizeof(addrs)) {
86 bytes = size - sizeof(addrs);
87 SaveFuncs.seek(f, bytes, SEEK_CUR);
90 bytes = SaveFuncs.read(f, addrs, size);
94 if (psxCpu != &psxInt)
95 new_dynarec_load_blocks(addrs, size);
98 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
101 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
104 void *gte_handlers[64];
106 void *gte_handlers_nf[64] = {
107 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
108 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
109 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
110 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
111 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
112 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
113 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
114 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
117 const char *gte_regnames[64] = {
118 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
119 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
120 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
121 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
122 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
123 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
124 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
125 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
132 #define GCBITS3(b0,b1,b2) \
133 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
134 #define GDBITS2(b0,b1) \
135 (GDBIT(b0) | GDBIT(b1))
136 #define GDBITS3(b0,b1,b2) \
137 (GDBITS2(b0,b1) | GDBIT(b2))
138 #define GDBITS4(b0,b1,b2,b3) \
139 (GDBITS3(b0,b1,b2) | GDBIT(b3))
140 #define GDBITS5(b0,b1,b2,b3,b4) \
141 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
142 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
143 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
144 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
145 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
146 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
147 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
148 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
149 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
150 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
151 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
153 const uint64_t gte_reg_reads[64] = {
154 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
155 [GTE_NCLIP] = GDBITS3(12,13,14),
156 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
157 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
158 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
159 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
160 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
161 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
162 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
163 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
164 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
165 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
166 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
167 [GTE_SQR] = GDBITS3(9,10,11),
168 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
169 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
170 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
171 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
172 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
173 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
174 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
175 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
178 // note: this excludes gteFLAG that is always written to
179 const uint64_t gte_reg_writes[64] = {
180 [GTE_RTPS] = 0x0f0f7f00ll,
181 [GTE_NCLIP] = GDBIT(24),
182 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
183 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
184 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
185 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
186 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
187 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
188 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
189 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
190 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
191 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
192 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
193 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
194 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
195 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
196 [GTE_AVSZ3] = GDBITS2(7,24),
197 [GTE_AVSZ4] = GDBITS2(7,24),
198 [GTE_RTPT] = 0x0f0f7f00ll,
199 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
200 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
201 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
204 static int ari64_init()
206 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
210 new_dyna_pcsx_mem_init();
212 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
213 if (psxCP2[i] != gteNULL)
214 gte_handlers[i] = psxCP2[i];
216 #if defined(__arm__) && !defined(DRC_DBG)
217 gte_handlers[0x06] = gteNCLIP_arm;
219 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
220 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
223 // compiler's _nf version is still a lot slower than neon
224 // _nf_arm RTPS is roughly the same, RTPT slower
225 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
226 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
230 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
233 zeromem_ptr = zero_mem;
234 scratch_buf_ptr = scratch_buf;
239 static void ari64_reset()
241 new_dyna_pcsx_mem_reset();
242 new_dynarec_invalidate_all_pages();
244 pending_exception = 1;
247 // execute until predefined leave points
248 // (HLE softcall exit and BIOS fastboot end)
249 static void ari64_execute_until()
251 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
252 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
254 new_dyna_start(dynarec_local);
256 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
257 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
260 static void ari64_execute()
263 schedule_timeslice();
264 ari64_execute_until();
265 evprintf("drc left @%08x\n", psxRegs.pc);
269 static void ari64_execute_block(enum blockExecCaller caller)
271 if (caller == EXEC_CALLER_BOOT)
274 next_interupt = psxRegs.cycle + 1;
275 ari64_execute_until();
277 if (caller == EXEC_CALLER_BOOT)
281 static void ari64_clear(u32 addr, u32 size)
283 size *= 4; /* PCSX uses DMA units (words) */
285 evprintf("ari64_clear %08x %04x\n", addr, size);
287 new_dynarec_invalidate_range(addr, addr + size);
290 static void ari64_notify(enum R3000Anote note, void *data) {
293 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
294 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
295 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
297 case R3000ACPU_NOTIFY_BEFORE_SAVE:
299 case R3000ACPU_NOTIFY_AFTER_LOAD:
302 psxInt.Notify(note, data);
307 static void ari64_apply_config()
311 if (Config.DisableStalls)
312 new_dynarec_hacks |= NDHACK_NO_STALLS;
314 new_dynarec_hacks &= ~NDHACK_NO_STALLS;
316 if (Config.cycle_multiplier != cycle_multiplier_old
317 || new_dynarec_hacks != new_dynarec_hacks_old)
319 new_dynarec_clear_full();
323 static void ari64_shutdown()
325 new_dynarec_cleanup();
326 new_dyna_pcsx_mem_shutdown();
340 #else // if DRC_DISABLE
342 unsigned int address;
343 int pending_exception, stop;
345 int new_dynarec_did_compile;
346 int cycle_multiplier_old;
347 int new_dynarec_hacks_pergame;
348 int new_dynarec_hacks_old;
349 int new_dynarec_hacks;
352 u32 zero_mem[0x1000/4];
354 void *scratch_buf_ptr;
355 void new_dynarec_init() {}
356 void new_dyna_start(void *context) {}
357 void new_dynarec_cleanup() {}
358 void new_dynarec_clear_full() {}
359 void new_dynarec_invalidate_all_pages() {}
360 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
361 void new_dyna_pcsx_mem_init(void) {}
362 void new_dyna_pcsx_mem_reset(void) {}
363 void new_dyna_pcsx_mem_load_state(void) {}
364 void new_dyna_pcsx_mem_isolate(int enable) {}
365 void new_dyna_pcsx_mem_shutdown(void) {}
366 int new_dynarec_save_blocks(void *save, int size) { return 0; }
367 void new_dynarec_load_blocks(const void *save, int size) {}
378 void dump_mem(const char *fname, void *mem, size_t size)
380 FILE *f1 = fopen(fname, "wb");
382 f1 = fopen(strrchr(fname, '/') + 1, "wb");
383 fwrite(mem, 1, size, f1);
387 static u32 memcheck_read(u32 a)
389 if ((a >> 16) == 0x1f80)
391 return *(u32 *)(psxH + (a & 0xfffc));
393 if ((a >> 16) == 0x1f00)
395 return *(u32 *)(psxP + (a & 0xfffc));
397 // if ((a & ~0xe0600000) < 0x200000)
399 return *(u32 *)(psxM + (a & 0x1ffffc));
403 void do_insn_trace(void)
405 static psxRegisters oldregs;
406 static u32 event_cycles_o[PSXINT_COUNT];
407 u32 *allregs_p = (void *)&psxRegs;
408 u32 *allregs_o = (void *)&oldregs;
413 //last_io_addr = 0x5e2c8;
415 f = fopen("tracelog", "wb");
418 oldregs.code = psxRegs.code; // don't care
419 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
420 if (allregs_p[i] != allregs_o[i]) {
422 fwrite(&allregs_p[i], 1, 4, f);
423 allregs_o[i] = allregs_p[i];
427 for (i = 0; i < PSXINT_COUNT; i++) {
428 if (event_cycles[i] != event_cycles_o[i]) {
430 fwrite(&byte, 1, 1, f);
432 fwrite(&event_cycles[i], 1, 4, f);
433 event_cycles_o[i] = event_cycles[i];
436 #define SAVE_IF_CHANGED(code_, name_) { \
437 static u32 old_##name_ = 0xbad0c0de; \
438 if (old_##name_ != name_) { \
440 fwrite(&byte, 1, 1, f); \
441 fwrite(&name_, 1, 4, f); \
442 old_##name_ = name_; \
445 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
446 SAVE_IF_CHANGED(0xfc, handler_cycle);
447 SAVE_IF_CHANGED(0xfd, last_io_addr);
448 io_data = memcheck_read(last_io_addr);
449 SAVE_IF_CHANGED(0xfe, io_data);
451 fwrite(&byte, 1, 1, f);
454 if (psxRegs.cycle == 190230) {
455 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
456 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
464 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
465 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
466 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
467 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
468 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
470 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
471 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
472 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
473 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
475 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
476 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
477 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
478 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
480 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
481 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
482 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
483 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
485 "PC", "code", "cycle", "interrupt",
493 static int miss_log_i;
494 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
495 #define miss_log_mask (miss_log_len-1)
497 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
499 miss_log[miss_log_i].reg = reg;
500 miss_log[miss_log_i].val = val;
501 miss_log[miss_log_i].val_expect = val_expect;
502 miss_log[miss_log_i].pc = pc;
503 miss_log[miss_log_i].cycle = cycle;
504 miss_log_i = (miss_log_i + 1) & miss_log_mask;
509 void do_insn_cmp(void)
511 extern int last_count;
512 static psxRegisters rregs;
513 static u32 mem_addr, mem_val;
514 static u32 irq_test_cycle_intr;
515 static u32 handler_cycle_intr;
516 u32 *allregs_p = (void *)&psxRegs;
517 u32 *allregs_e = (void *)&rregs;
518 u32 badregs_mask = 0;
519 static u32 ppc, failcount;
520 static u32 badregs_mask_prev;
521 int i, ret, bad = 0, fatal = 0, which_event = -1;
526 f = fopen("tracelog", "rb");
529 if ((ret = fread(&code, 1, 1, f)) <= 0)
538 fread(&which_event, 1, 1, f);
539 fread(&ev_cycles, 1, 4, f);
542 fread(&irq_test_cycle_intr, 1, 4, f);
545 fread(&handler_cycle_intr, 1, 4, f);
548 fread(&mem_addr, 1, 4, f);
551 fread(&mem_val, 1, 4, f);
554 assert(code < offsetof(psxRegisters, intCycle) / 4);
555 fread(&allregs_e[code], 1, 4, f);
563 psxRegs.code = rregs.code; // don't care
564 psxRegs.cycle += last_count;
565 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
566 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
568 //if (psxRegs.cycle == 166172) breakme();
570 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
571 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
572 event_cycles[which_event], ev_cycles, psxRegs.cycle);
576 if (irq_test_cycle > irq_test_cycle_intr) {
577 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
581 if (handler_cycle != handler_cycle_intr) {
582 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
586 if (mem_val != memcheck_read(mem_addr)) {
587 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
591 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
596 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
597 if (allregs_p[i] != allregs_e[i]) {
598 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
603 badregs_mask |= 1u << i;
607 if (badregs_mask_prev & badregs_mask)
612 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) {
613 static int last_mcycle;
614 if (last_mcycle != psxRegs.cycle >> 20) {
615 printf("%u\n", psxRegs.cycle);
616 last_mcycle = psxRegs.cycle >> 20;
621 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
622 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
623 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
624 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
625 printf("-- %d\n", bad);
626 for (i = 0; i < 8; i++)
627 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
628 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
629 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
630 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
631 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
634 //psxRegs.cycle = rregs.cycle + 2; // sync timing
636 badregs_mask_prev = badregs_mask;