2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
13 #include "../psxhle.h"
14 #include "../psxinterpreter.h"
15 #include "../r3000a.h"
16 #include "../gte_arm.h"
17 #include "../gte_neon.h"
21 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23 //#define evprintf printf
26 void pcsx_mtc0(u32 reg, u32 val)
28 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
29 MTC0(&psxRegs, reg, val);
30 gen_interupt(&psxRegs.CP0);
31 if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x0300) // possible sw irq
32 pending_exception = 1;
35 void pcsx_mtc0_ds(u32 reg, u32 val)
37 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
38 MTC0(&psxRegs, reg, val);
41 static void new_dyna_restore(void)
44 for (i = 0; i < PSXINT_COUNT; i++)
45 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
47 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
48 psxRegs.interrupt |= 1 << PSXINT_RCNT;
49 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
51 new_dyna_pcsx_mem_load_state();
54 void new_dyna_freeze(void *f, int mode)
56 const char header_save[8] = "ariblks";
57 uint32_t addrs[1024 * 4];
62 if (mode != 0) { // save
63 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
67 SaveFuncs.write(f, header_save, sizeof(header_save));
68 SaveFuncs.write(f, &size, sizeof(size));
69 SaveFuncs.write(f, addrs, size);
74 bytes = SaveFuncs.read(f, header, sizeof(header));
75 if (bytes != sizeof(header) || strcmp(header, header_save)) {
77 SaveFuncs.seek(f, -bytes, SEEK_CUR);
80 SaveFuncs.read(f, &size, sizeof(size));
83 if (size > sizeof(addrs)) {
84 bytes = size - sizeof(addrs);
85 SaveFuncs.seek(f, bytes, SEEK_CUR);
88 bytes = SaveFuncs.read(f, addrs, size);
92 if (psxCpu != &psxInt)
93 new_dynarec_load_blocks(addrs, size);
96 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
99 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
102 void *gte_handlers[64];
104 void *gte_handlers_nf[64] = {
105 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
106 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
107 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
108 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
109 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
110 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
111 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
112 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
115 const char *gte_regnames[64] = {
116 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
117 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
118 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
119 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
120 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
121 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
122 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
123 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
130 #define GCBITS3(b0,b1,b2) \
131 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
132 #define GDBITS2(b0,b1) \
133 (GDBIT(b0) | GDBIT(b1))
134 #define GDBITS3(b0,b1,b2) \
135 (GDBITS2(b0,b1) | GDBIT(b2))
136 #define GDBITS4(b0,b1,b2,b3) \
137 (GDBITS3(b0,b1,b2) | GDBIT(b3))
138 #define GDBITS5(b0,b1,b2,b3,b4) \
139 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
140 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
141 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
142 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
143 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
144 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
145 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
146 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
147 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
148 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
149 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
151 const uint64_t gte_reg_reads[64] = {
152 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
153 [GTE_NCLIP] = GDBITS3(12,13,14),
154 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
155 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
156 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
157 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
158 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
159 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
160 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
161 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
162 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
163 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
164 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
165 [GTE_SQR] = GDBITS3(9,10,11),
166 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
167 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
168 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
169 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
170 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
171 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
172 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
173 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
176 // note: this excludes gteFLAG that is always written to
177 const uint64_t gte_reg_writes[64] = {
178 [GTE_RTPS] = 0x0f0f7f00ll,
179 [GTE_NCLIP] = GDBIT(24),
180 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
181 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
182 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
183 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
184 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
185 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
186 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
187 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
188 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
189 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
190 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
191 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
192 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
193 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
194 [GTE_AVSZ3] = GDBITS2(7,24),
195 [GTE_AVSZ4] = GDBITS2(7,24),
196 [GTE_RTPT] = 0x0f0f7f00ll,
197 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
198 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
199 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
202 static int ari64_init()
204 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
208 new_dyna_pcsx_mem_init();
210 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
211 if (psxCP2[i] != gteNULL)
212 gte_handlers[i] = psxCP2[i];
214 #if defined(__arm__) && !defined(DRC_DBG)
215 gte_handlers[0x06] = gteNCLIP_arm;
217 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
218 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
221 // compiler's _nf version is still a lot slower than neon
222 // _nf_arm RTPS is roughly the same, RTPT slower
223 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
224 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
228 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
231 zeromem_ptr = zero_mem;
232 scratch_buf_ptr = scratch_buf;
237 static void ari64_reset()
239 new_dyna_pcsx_mem_reset();
240 new_dynarec_invalidate_all_pages();
242 pending_exception = 1;
245 // execute until predefined leave points
246 // (HLE softcall exit and BIOS fastboot end)
247 static void ari64_execute_until()
249 schedule_timeslice();
251 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
252 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
254 new_dyna_start(dynarec_local);
256 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
257 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
260 static void ari64_execute()
263 ari64_execute_until();
264 evprintf("drc left @%08x\n", psxRegs.pc);
268 static void ari64_clear(u32 addr, u32 size)
270 size *= 4; /* PCSX uses DMA units (words) */
272 evprintf("ari64_clear %08x %04x\n", addr, size);
274 new_dynarec_invalidate_range(addr, addr + size);
277 static void ari64_notify(enum R3000Anote note, void *data) {
280 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
281 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
282 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
284 case R3000ACPU_NOTIFY_BEFORE_SAVE:
286 case R3000ACPU_NOTIFY_AFTER_LOAD:
292 static void ari64_apply_config()
296 if (Config.DisableStalls)
297 new_dynarec_hacks |= NDHACK_NO_STALLS;
299 new_dynarec_hacks &= ~NDHACK_NO_STALLS;
301 if (Config.cycle_multiplier != cycle_multiplier_old
302 || new_dynarec_hacks != new_dynarec_hacks_old)
304 new_dynarec_clear_full();
308 static void ari64_shutdown()
310 new_dynarec_cleanup();
311 new_dyna_pcsx_mem_shutdown();
325 #else // if DRC_DISABLE
327 unsigned int address;
328 int pending_exception, stop;
330 int new_dynarec_did_compile;
331 int cycle_multiplier_old;
332 int new_dynarec_hacks_pergame;
333 int new_dynarec_hacks_old;
334 int new_dynarec_hacks;
337 u32 zero_mem[0x1000/4];
339 void *scratch_buf_ptr;
340 void new_dynarec_init() {}
341 void new_dyna_start(void *context) {}
342 void new_dynarec_cleanup() {}
343 void new_dynarec_clear_full() {}
344 void new_dynarec_invalidate_all_pages() {}
345 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
346 void new_dyna_pcsx_mem_init(void) {}
347 void new_dyna_pcsx_mem_reset(void) {}
348 void new_dyna_pcsx_mem_load_state(void) {}
349 void new_dyna_pcsx_mem_isolate(int enable) {}
350 void new_dyna_pcsx_mem_shutdown(void) {}
351 int new_dynarec_save_blocks(void *save, int size) { return 0; }
352 void new_dynarec_load_blocks(const void *save, int size) {}
363 void dump_mem(const char *fname, void *mem, size_t size)
365 FILE *f1 = fopen(fname, "wb");
367 f1 = fopen(strrchr(fname, '/') + 1, "wb");
368 fwrite(mem, 1, size, f1);
372 static u32 memcheck_read(u32 a)
374 if ((a >> 16) == 0x1f80)
376 return *(u32 *)(psxH + (a & 0xfffc));
378 if ((a >> 16) == 0x1f00)
380 return *(u32 *)(psxP + (a & 0xfffc));
382 // if ((a & ~0xe0600000) < 0x200000)
384 return *(u32 *)(psxM + (a & 0x1ffffc));
388 void do_insn_trace(void)
390 static psxRegisters oldregs;
391 static u32 event_cycles_o[PSXINT_COUNT];
392 u32 *allregs_p = (void *)&psxRegs;
393 u32 *allregs_o = (void *)&oldregs;
398 //last_io_addr = 0x5e2c8;
400 f = fopen("tracelog", "wb");
403 oldregs.code = psxRegs.code; // don't care
404 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
405 if (allregs_p[i] != allregs_o[i]) {
407 fwrite(&allregs_p[i], 1, 4, f);
408 allregs_o[i] = allregs_p[i];
412 for (i = 0; i < PSXINT_COUNT; i++) {
413 if (event_cycles[i] != event_cycles_o[i]) {
415 fwrite(&byte, 1, 1, f);
417 fwrite(&event_cycles[i], 1, 4, f);
418 event_cycles_o[i] = event_cycles[i];
421 #define SAVE_IF_CHANGED(code_, name_) { \
422 static u32 old_##name_ = 0xbad0c0de; \
423 if (old_##name_ != name_) { \
425 fwrite(&byte, 1, 1, f); \
426 fwrite(&name_, 1, 4, f); \
427 old_##name_ = name_; \
430 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
431 SAVE_IF_CHANGED(0xfc, handler_cycle);
432 SAVE_IF_CHANGED(0xfd, last_io_addr);
433 io_data = memcheck_read(last_io_addr);
434 SAVE_IF_CHANGED(0xfe, io_data);
436 fwrite(&byte, 1, 1, f);
439 if (psxRegs.cycle == 190230) {
440 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
441 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
449 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
450 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
451 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
452 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
453 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
455 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
456 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
457 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
458 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
460 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
461 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
462 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
463 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
465 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
466 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
467 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
468 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
470 "PC", "code", "cycle", "interrupt",
478 static int miss_log_i;
479 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
480 #define miss_log_mask (miss_log_len-1)
482 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
484 miss_log[miss_log_i].reg = reg;
485 miss_log[miss_log_i].val = val;
486 miss_log[miss_log_i].val_expect = val_expect;
487 miss_log[miss_log_i].pc = pc;
488 miss_log[miss_log_i].cycle = cycle;
489 miss_log_i = (miss_log_i + 1) & miss_log_mask;
494 void do_insn_cmp(void)
496 extern int last_count;
497 static psxRegisters rregs;
498 static u32 mem_addr, mem_val;
499 static u32 irq_test_cycle_intr;
500 static u32 handler_cycle_intr;
501 u32 *allregs_p = (void *)&psxRegs;
502 u32 *allregs_e = (void *)&rregs;
503 static u32 ppc, failcount;
504 int i, ret, bad = 0, fatal = 0, which_event = -1;
509 f = fopen("tracelog", "rb");
512 if ((ret = fread(&code, 1, 1, f)) <= 0)
521 fread(&which_event, 1, 1, f);
522 fread(&ev_cycles, 1, 4, f);
525 fread(&irq_test_cycle_intr, 1, 4, f);
528 fread(&handler_cycle_intr, 1, 4, f);
531 fread(&mem_addr, 1, 4, f);
534 fread(&mem_val, 1, 4, f);
537 assert(code < offsetof(psxRegisters, intCycle) / 4);
538 fread(&allregs_e[code], 1, 4, f);
546 psxRegs.code = rregs.code; // don't care
547 psxRegs.cycle += last_count;
548 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
549 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
551 //if (psxRegs.cycle == 166172) breakme();
553 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
554 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
555 event_cycles[which_event], ev_cycles, psxRegs.cycle);
559 if (irq_test_cycle > irq_test_cycle_intr) {
560 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
564 if (handler_cycle != handler_cycle_intr) {
565 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
569 if (mem_val != memcheck_read(mem_addr)) {
570 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
574 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
579 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
580 if (allregs_p[i] != allregs_e[i]) {
581 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
588 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
589 static int last_mcycle;
590 if (last_mcycle != psxRegs.cycle >> 20) {
591 printf("%u\n", psxRegs.cycle);
592 last_mcycle = psxRegs.cycle >> 20;
598 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
599 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
600 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
601 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
602 printf("-- %d\n", bad);
603 for (i = 0; i < 8; i++)
604 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
605 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
606 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
607 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
608 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
611 //psxRegs.cycle = rregs.cycle + 2; // sync timing