2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
13 #include "../psxhle.h"
14 #include "../psxinterpreter.h"
15 #include "../r3000a.h"
16 #include "../gte_arm.h"
17 #include "../gte_neon.h"
21 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23 //#define evprintf printf
26 void pcsx_mtc0(u32 reg, u32 val)
28 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
29 MTC0(&psxRegs, reg, val);
30 gen_interupt(&psxRegs.CP0);
31 if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300) // possible sw irq
32 pending_exception = 1;
35 void pcsx_mtc0_ds(u32 reg, u32 val)
37 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
38 MTC0(&psxRegs, reg, val);
41 static void new_dyna_restore(void)
44 for (i = 0; i < PSXINT_COUNT; i++)
45 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
47 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
48 psxRegs.interrupt |= 1 << PSXINT_RCNT;
49 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
51 new_dyna_pcsx_mem_load_state();
54 void new_dyna_freeze(void *f, int mode)
56 const char header_save[8] = "ariblks";
57 uint32_t addrs[1024 * 4];
62 if (mode != 0) { // save
63 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
67 SaveFuncs.write(f, header_save, sizeof(header_save));
68 SaveFuncs.write(f, &size, sizeof(size));
69 SaveFuncs.write(f, addrs, size);
74 bytes = SaveFuncs.read(f, header, sizeof(header));
75 if (bytes != sizeof(header) || strcmp(header, header_save)) {
77 SaveFuncs.seek(f, -bytes, SEEK_CUR);
80 SaveFuncs.read(f, &size, sizeof(size));
83 if (size > sizeof(addrs)) {
84 bytes = size - sizeof(addrs);
85 SaveFuncs.seek(f, bytes, SEEK_CUR);
88 bytes = SaveFuncs.read(f, addrs, size);
92 if (psxCpu != &psxInt)
93 new_dynarec_load_blocks(addrs, size);
96 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
99 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
102 void *gte_handlers[64];
104 void *gte_handlers_nf[64] = {
105 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
106 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
107 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
108 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
109 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
110 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
111 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
112 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
115 const char *gte_regnames[64] = {
116 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
117 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
118 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
119 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
120 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
121 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
122 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
123 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
130 #define GCBITS3(b0,b1,b2) \
131 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
132 #define GDBITS2(b0,b1) \
133 (GDBIT(b0) | GDBIT(b1))
134 #define GDBITS3(b0,b1,b2) \
135 (GDBITS2(b0,b1) | GDBIT(b2))
136 #define GDBITS4(b0,b1,b2,b3) \
137 (GDBITS3(b0,b1,b2) | GDBIT(b3))
138 #define GDBITS5(b0,b1,b2,b3,b4) \
139 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
140 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
141 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
142 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
143 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
144 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
145 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
146 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
147 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
148 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
149 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
151 const uint64_t gte_reg_reads[64] = {
152 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
153 [GTE_NCLIP] = GDBITS3(12,13,14),
154 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
155 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
156 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
157 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
158 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
159 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
160 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
161 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
162 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
163 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
164 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
165 [GTE_SQR] = GDBITS3(9,10,11),
166 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
167 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
168 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
169 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
170 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
171 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
172 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
173 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
176 // note: this excludes gteFLAG that is always written to
177 const uint64_t gte_reg_writes[64] = {
178 [GTE_RTPS] = 0x0f0f7f00ll,
179 [GTE_NCLIP] = GDBIT(24),
180 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
181 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
182 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
183 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
184 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
185 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
186 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
187 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
188 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
189 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
190 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
191 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
192 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
193 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
194 [GTE_AVSZ3] = GDBITS2(7,24),
195 [GTE_AVSZ4] = GDBITS2(7,24),
196 [GTE_RTPT] = 0x0f0f7f00ll,
197 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
198 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
199 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
202 static int ari64_init()
204 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
208 new_dyna_pcsx_mem_init();
210 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
211 if (psxCP2[i] != gteNULL)
212 gte_handlers[i] = psxCP2[i];
214 #if defined(__arm__) && !defined(DRC_DBG)
215 gte_handlers[0x06] = gteNCLIP_arm;
217 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
218 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
221 // compiler's _nf version is still a lot slower than neon
222 // _nf_arm RTPS is roughly the same, RTPT slower
223 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
224 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
228 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
231 zeromem_ptr = zero_mem;
232 scratch_buf_ptr = scratch_buf;
237 static void ari64_reset()
239 new_dyna_pcsx_mem_reset();
240 new_dynarec_invalidate_all_pages();
242 pending_exception = 1;
245 // execute until predefined leave points
246 // (HLE softcall exit and BIOS fastboot end)
247 static void ari64_execute_until()
249 schedule_timeslice();
251 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
252 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
254 new_dyna_start(dynarec_local);
256 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
257 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
260 static void ari64_execute()
263 ari64_execute_until();
264 evprintf("drc left @%08x\n", psxRegs.pc);
268 static void ari64_execute_block(enum blockExecCaller caller)
270 if (caller == EXEC_CALLER_BOOT)
273 ari64_execute_until();
275 if (caller == EXEC_CALLER_BOOT)
279 static void ari64_clear(u32 addr, u32 size)
281 size *= 4; /* PCSX uses DMA units (words) */
283 evprintf("ari64_clear %08x %04x\n", addr, size);
285 new_dynarec_invalidate_range(addr, addr + size);
288 static void ari64_notify(enum R3000Anote note, void *data) {
291 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
292 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
293 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
295 case R3000ACPU_NOTIFY_BEFORE_SAVE:
297 case R3000ACPU_NOTIFY_AFTER_LOAD:
303 static void ari64_apply_config()
307 if (Config.DisableStalls)
308 new_dynarec_hacks |= NDHACK_NO_STALLS;
310 new_dynarec_hacks &= ~NDHACK_NO_STALLS;
312 if (Config.cycle_multiplier != cycle_multiplier_old
313 || new_dynarec_hacks != new_dynarec_hacks_old)
315 new_dynarec_clear_full();
319 static void ari64_shutdown()
321 new_dynarec_cleanup();
322 new_dyna_pcsx_mem_shutdown();
336 #else // if DRC_DISABLE
338 unsigned int address;
339 int pending_exception, stop;
341 int new_dynarec_did_compile;
342 int cycle_multiplier_old;
343 int new_dynarec_hacks_pergame;
344 int new_dynarec_hacks_old;
345 int new_dynarec_hacks;
348 u32 zero_mem[0x1000/4];
350 void *scratch_buf_ptr;
351 void new_dynarec_init() {}
352 void new_dyna_start(void *context) {}
353 void new_dynarec_cleanup() {}
354 void new_dynarec_clear_full() {}
355 void new_dynarec_invalidate_all_pages() {}
356 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
357 void new_dyna_pcsx_mem_init(void) {}
358 void new_dyna_pcsx_mem_reset(void) {}
359 void new_dyna_pcsx_mem_load_state(void) {}
360 void new_dyna_pcsx_mem_isolate(int enable) {}
361 void new_dyna_pcsx_mem_shutdown(void) {}
362 int new_dynarec_save_blocks(void *save, int size) { return 0; }
363 void new_dynarec_load_blocks(const void *save, int size) {}
374 void dump_mem(const char *fname, void *mem, size_t size)
376 FILE *f1 = fopen(fname, "wb");
378 f1 = fopen(strrchr(fname, '/') + 1, "wb");
379 fwrite(mem, 1, size, f1);
383 static u32 memcheck_read(u32 a)
385 if ((a >> 16) == 0x1f80)
387 return *(u32 *)(psxH + (a & 0xfffc));
389 if ((a >> 16) == 0x1f00)
391 return *(u32 *)(psxP + (a & 0xfffc));
393 // if ((a & ~0xe0600000) < 0x200000)
395 return *(u32 *)(psxM + (a & 0x1ffffc));
399 void do_insn_trace(void)
401 static psxRegisters oldregs;
402 static u32 event_cycles_o[PSXINT_COUNT];
403 u32 *allregs_p = (void *)&psxRegs;
404 u32 *allregs_o = (void *)&oldregs;
409 //last_io_addr = 0x5e2c8;
411 f = fopen("tracelog", "wb");
414 oldregs.code = psxRegs.code; // don't care
415 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
416 if (allregs_p[i] != allregs_o[i]) {
418 fwrite(&allregs_p[i], 1, 4, f);
419 allregs_o[i] = allregs_p[i];
423 for (i = 0; i < PSXINT_COUNT; i++) {
424 if (event_cycles[i] != event_cycles_o[i]) {
426 fwrite(&byte, 1, 1, f);
428 fwrite(&event_cycles[i], 1, 4, f);
429 event_cycles_o[i] = event_cycles[i];
432 #define SAVE_IF_CHANGED(code_, name_) { \
433 static u32 old_##name_ = 0xbad0c0de; \
434 if (old_##name_ != name_) { \
436 fwrite(&byte, 1, 1, f); \
437 fwrite(&name_, 1, 4, f); \
438 old_##name_ = name_; \
441 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
442 SAVE_IF_CHANGED(0xfc, handler_cycle);
443 SAVE_IF_CHANGED(0xfd, last_io_addr);
444 io_data = memcheck_read(last_io_addr);
445 SAVE_IF_CHANGED(0xfe, io_data);
447 fwrite(&byte, 1, 1, f);
450 if (psxRegs.cycle == 190230) {
451 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
452 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
460 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
461 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
462 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
463 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
464 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
466 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
467 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
468 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
469 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
471 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
472 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
473 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
474 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
476 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
477 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
478 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
479 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
481 "PC", "code", "cycle", "interrupt",
489 static int miss_log_i;
490 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
491 #define miss_log_mask (miss_log_len-1)
493 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
495 miss_log[miss_log_i].reg = reg;
496 miss_log[miss_log_i].val = val;
497 miss_log[miss_log_i].val_expect = val_expect;
498 miss_log[miss_log_i].pc = pc;
499 miss_log[miss_log_i].cycle = cycle;
500 miss_log_i = (miss_log_i + 1) & miss_log_mask;
505 void do_insn_cmp(void)
507 extern int last_count;
508 static psxRegisters rregs;
509 static u32 mem_addr, mem_val;
510 static u32 irq_test_cycle_intr;
511 static u32 handler_cycle_intr;
512 u32 *allregs_p = (void *)&psxRegs;
513 u32 *allregs_e = (void *)&rregs;
514 static u32 ppc, failcount;
515 int i, ret, bad = 0, fatal = 0, which_event = -1;
520 f = fopen("tracelog", "rb");
523 if ((ret = fread(&code, 1, 1, f)) <= 0)
532 fread(&which_event, 1, 1, f);
533 fread(&ev_cycles, 1, 4, f);
536 fread(&irq_test_cycle_intr, 1, 4, f);
539 fread(&handler_cycle_intr, 1, 4, f);
542 fread(&mem_addr, 1, 4, f);
545 fread(&mem_val, 1, 4, f);
548 assert(code < offsetof(psxRegisters, intCycle) / 4);
549 fread(&allregs_e[code], 1, 4, f);
557 psxRegs.code = rregs.code; // don't care
558 psxRegs.cycle += last_count;
559 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
560 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
562 //if (psxRegs.cycle == 166172) breakme();
564 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
565 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
566 event_cycles[which_event], ev_cycles, psxRegs.cycle);
570 if (irq_test_cycle > irq_test_cycle_intr) {
571 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
575 if (handler_cycle != handler_cycle_intr) {
576 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
580 if (mem_val != memcheck_read(mem_addr)) {
581 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
585 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
590 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
591 if (allregs_p[i] != allregs_e[i]) {
592 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
599 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
600 static int last_mcycle;
601 if (last_mcycle != psxRegs.cycle >> 20) {
602 printf("%u\n", psxRegs.cycle);
603 last_mcycle = psxRegs.cycle >> 20;
609 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
610 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
611 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
612 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
613 printf("-- %d\n", bad);
614 for (i = 0; i < 8; i++)
615 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
616 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
617 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
618 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
619 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
622 //psxRegs.cycle = rregs.cycle + 2; // sync timing