2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
13 #include "../psxhle.h"
14 #include "../psxinterpreter.h"
15 #include "../psxcounters.h"
16 #include "../r3000a.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 void pcsx_mtc0(u32 reg, u32 val)
29 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
30 MTC0(&psxRegs, reg, val);
31 gen_interupt(&psxRegs.CP0);
33 //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq
34 if ((psxRegs.pc & 0x803ffeff) == 0x80000080)
35 pending_exception = 1;
38 void pcsx_mtc0_ds(u32 reg, u32 val)
40 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
41 MTC0(&psxRegs, reg, val);
44 static void new_dyna_restore(void)
47 for (i = 0; i < PSXINT_COUNT; i++)
48 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
50 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
51 psxRegs.interrupt |= 1 << PSXINT_RCNT;
52 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
54 new_dyna_pcsx_mem_load_state();
57 void new_dyna_freeze(void *f, int mode)
59 const char header_save[8] = "ariblks";
60 uint32_t addrs[1024 * 4];
65 if (mode != 0) { // save
66 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
70 SaveFuncs.write(f, header_save, sizeof(header_save));
71 SaveFuncs.write(f, &size, sizeof(size));
72 SaveFuncs.write(f, addrs, size);
77 bytes = SaveFuncs.read(f, header, sizeof(header));
78 if (bytes != sizeof(header) || strcmp(header, header_save)) {
80 SaveFuncs.seek(f, -bytes, SEEK_CUR);
83 SaveFuncs.read(f, &size, sizeof(size));
86 if (size > sizeof(addrs)) {
87 bytes = size - sizeof(addrs);
88 SaveFuncs.seek(f, bytes, SEEK_CUR);
91 bytes = SaveFuncs.read(f, addrs, size);
95 if (psxCpu != &psxInt)
96 new_dynarec_load_blocks(addrs, size);
99 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
102 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
105 void *gte_handlers[64];
107 void *gte_handlers_nf[64] = {
108 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
109 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
110 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
111 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
112 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
113 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
114 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
115 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
118 const char *gte_regnames[64] = {
119 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
120 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
121 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
122 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
123 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
124 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
125 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
126 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
133 #define GCBITS3(b0,b1,b2) \
134 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
135 #define GDBITS2(b0,b1) \
136 (GDBIT(b0) | GDBIT(b1))
137 #define GDBITS3(b0,b1,b2) \
138 (GDBITS2(b0,b1) | GDBIT(b2))
139 #define GDBITS4(b0,b1,b2,b3) \
140 (GDBITS3(b0,b1,b2) | GDBIT(b3))
141 #define GDBITS5(b0,b1,b2,b3,b4) \
142 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
143 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
144 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
145 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
146 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
147 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
148 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
149 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
150 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
151 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
152 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
154 const uint64_t gte_reg_reads[64] = {
155 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
156 [GTE_NCLIP] = GDBITS3(12,13,14),
157 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
158 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
159 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
160 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
161 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
162 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
163 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
164 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
165 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
166 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
167 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
168 [GTE_SQR] = GDBITS3(9,10,11),
169 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
170 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
171 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
172 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
173 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
174 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
175 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
176 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
179 // note: this excludes gteFLAG that is always written to
180 const uint64_t gte_reg_writes[64] = {
181 [GTE_RTPS] = 0x0f0f7f00ll,
182 [GTE_NCLIP] = GDBIT(24),
183 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
184 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
185 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
186 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
187 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
188 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
189 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
190 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
191 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
192 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
193 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
194 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
195 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
196 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
197 [GTE_AVSZ3] = GDBITS2(7,24),
198 [GTE_AVSZ4] = GDBITS2(7,24),
199 [GTE_RTPT] = 0x0f0f7f00ll,
200 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
201 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
202 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
205 static int ari64_init()
207 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
211 new_dyna_pcsx_mem_init();
213 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
214 if (psxCP2[i] != gteNULL)
215 gte_handlers[i] = psxCP2[i];
217 #if defined(__arm__) && !defined(DRC_DBG)
218 gte_handlers[0x06] = gteNCLIP_arm;
220 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
221 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
224 // compiler's _nf version is still a lot slower than neon
225 // _nf_arm RTPS is roughly the same, RTPT slower
226 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
227 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
231 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
234 zeromem_ptr = zero_mem;
235 scratch_buf_ptr = scratch_buf;
240 static void ari64_reset()
242 new_dyna_pcsx_mem_reset();
243 new_dynarec_invalidate_all_pages();
245 pending_exception = 1;
248 // execute until predefined leave points
249 // (HLE softcall exit and BIOS fastboot end)
250 static void ari64_execute_until()
252 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
253 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
255 new_dyna_start(dynarec_local);
257 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
258 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
261 static void ari64_execute()
264 schedule_timeslice();
265 ari64_execute_until();
266 evprintf("drc left @%08x\n", psxRegs.pc);
270 static void ari64_execute_block(enum blockExecCaller caller)
272 if (caller == EXEC_CALLER_BOOT)
275 next_interupt = psxRegs.cycle + 1;
276 ari64_execute_until();
278 if (caller == EXEC_CALLER_BOOT)
282 static void ari64_clear(u32 addr, u32 size)
284 size *= 4; /* PCSX uses DMA units (words) */
286 evprintf("ari64_clear %08x %04x\n", addr, size);
288 new_dynarec_invalidate_range(addr, addr + size);
291 static void ari64_notify(enum R3000Anote note, void *data) {
294 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
295 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
296 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
298 case R3000ACPU_NOTIFY_BEFORE_SAVE:
300 case R3000ACPU_NOTIFY_AFTER_LOAD:
303 psxInt.Notify(note, data);
308 static void ari64_apply_config()
312 if (Config.DisableStalls)
313 new_dynarec_hacks |= NDHACK_NO_STALLS;
315 new_dynarec_hacks &= ~NDHACK_NO_STALLS;
317 if (Config.cycle_multiplier != cycle_multiplier_old
318 || new_dynarec_hacks != new_dynarec_hacks_old)
320 new_dynarec_clear_full();
324 static void ari64_shutdown()
326 new_dynarec_cleanup();
327 new_dyna_pcsx_mem_shutdown();
341 #else // if DRC_DISABLE
343 unsigned int address;
344 int pending_exception, stop;
346 int new_dynarec_did_compile;
347 int cycle_multiplier_old;
348 int new_dynarec_hacks_pergame;
349 int new_dynarec_hacks_old;
350 int new_dynarec_hacks;
353 u32 zero_mem[0x1000/4];
355 void *scratch_buf_ptr;
356 void new_dynarec_init() {}
357 void new_dyna_start(void *context) {}
358 void new_dynarec_cleanup() {}
359 void new_dynarec_clear_full() {}
360 void new_dynarec_invalidate_all_pages() {}
361 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
362 void new_dyna_pcsx_mem_init(void) {}
363 void new_dyna_pcsx_mem_reset(void) {}
364 void new_dyna_pcsx_mem_load_state(void) {}
365 void new_dyna_pcsx_mem_isolate(int enable) {}
366 void new_dyna_pcsx_mem_shutdown(void) {}
367 int new_dynarec_save_blocks(void *save, int size) { return 0; }
368 void new_dynarec_load_blocks(const void *save, int size) {}
379 void dump_mem(const char *fname, void *mem, size_t size)
381 FILE *f1 = fopen(fname, "wb");
383 f1 = fopen(strrchr(fname, '/') + 1, "wb");
384 fwrite(mem, 1, size, f1);
388 static u32 memcheck_read(u32 a)
390 if ((a >> 16) == 0x1f80)
392 return *(u32 *)(psxH + (a & 0xfffc));
394 if ((a >> 16) == 0x1f00)
396 return *(u32 *)(psxP + (a & 0xfffc));
398 // if ((a & ~0xe0600000) < 0x200000)
400 return *(u32 *)(psxM + (a & 0x1ffffc));
404 void do_insn_trace(void)
406 static psxRegisters oldregs;
407 static u32 event_cycles_o[PSXINT_COUNT];
408 u32 *allregs_p = (void *)&psxRegs;
409 u32 *allregs_o = (void *)&oldregs;
414 //last_io_addr = 0x5e2c8;
416 f = fopen("tracelog", "wb");
419 oldregs.code = psxRegs.code; // don't care
420 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
421 if (allregs_p[i] != allregs_o[i]) {
423 fwrite(&allregs_p[i], 1, 4, f);
424 allregs_o[i] = allregs_p[i];
428 for (i = 0; i < PSXINT_COUNT; i++) {
429 if (event_cycles[i] != event_cycles_o[i]) {
431 fwrite(&byte, 1, 1, f);
433 fwrite(&event_cycles[i], 1, 4, f);
434 event_cycles_o[i] = event_cycles[i];
437 #define SAVE_IF_CHANGED(code_, name_) { \
438 static u32 old_##name_ = 0xbad0c0de; \
439 if (old_##name_ != name_) { \
441 fwrite(&byte, 1, 1, f); \
442 fwrite(&name_, 1, 4, f); \
443 old_##name_ = name_; \
446 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
447 SAVE_IF_CHANGED(0xfc, handler_cycle);
448 SAVE_IF_CHANGED(0xfd, last_io_addr);
449 io_data = memcheck_read(last_io_addr);
450 SAVE_IF_CHANGED(0xfe, io_data);
452 fwrite(&byte, 1, 1, f);
455 if (psxRegs.cycle == 190230) {
456 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
457 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
465 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
466 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
467 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
468 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
469 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
471 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
472 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
473 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
474 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
476 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
477 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
478 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
479 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
481 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
482 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
483 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
484 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
486 "PC", "code", "cycle", "interrupt",
494 static int miss_log_i;
495 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
496 #define miss_log_mask (miss_log_len-1)
498 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
500 miss_log[miss_log_i].reg = reg;
501 miss_log[miss_log_i].val = val;
502 miss_log[miss_log_i].val_expect = val_expect;
503 miss_log[miss_log_i].pc = pc;
504 miss_log[miss_log_i].cycle = cycle;
505 miss_log_i = (miss_log_i + 1) & miss_log_mask;
510 void do_insn_cmp(void)
512 extern int last_count;
513 static psxRegisters rregs;
514 static u32 mem_addr, mem_val;
515 static u32 irq_test_cycle_intr;
516 static u32 handler_cycle_intr;
517 u32 *allregs_p = (void *)&psxRegs;
518 u32 *allregs_e = (void *)&rregs;
519 u32 badregs_mask = 0;
520 static u32 ppc, failcount;
521 static u32 badregs_mask_prev;
522 int i, ret, bad = 0, fatal = 0, which_event = -1;
527 f = fopen("tracelog", "rb");
530 if ((ret = fread(&code, 1, 1, f)) <= 0)
539 fread(&which_event, 1, 1, f);
540 fread(&ev_cycles, 1, 4, f);
543 fread(&irq_test_cycle_intr, 1, 4, f);
546 fread(&handler_cycle_intr, 1, 4, f);
549 fread(&mem_addr, 1, 4, f);
552 fread(&mem_val, 1, 4, f);
555 assert(code < offsetof(psxRegisters, intCycle) / 4);
556 fread(&allregs_e[code], 1, 4, f);
564 psxRegs.code = rregs.code; // don't care
565 psxRegs.cycle += last_count;
566 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
567 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
569 //if (psxRegs.cycle == 166172) breakme();
571 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
572 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
573 event_cycles[which_event], ev_cycles, psxRegs.cycle);
577 if (irq_test_cycle > irq_test_cycle_intr) {
578 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
582 if (handler_cycle != handler_cycle_intr) {
583 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
587 if (mem_val != memcheck_read(mem_addr)) {
588 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
592 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
597 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
598 if (allregs_p[i] != allregs_e[i]) {
599 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
604 badregs_mask |= 1u << i;
608 if (badregs_mask_prev & badregs_mask)
613 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) {
614 static int last_mcycle;
615 if (last_mcycle != psxRegs.cycle >> 20) {
616 printf("%u\n", psxRegs.cycle);
617 last_mcycle = psxRegs.cycle >> 20;
622 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
623 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
624 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
625 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
626 printf("-- %d\n", bad);
627 for (i = 0; i < 8; i++)
628 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
629 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
630 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
631 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
632 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
635 //psxRegs.cycle = rregs.cycle + 2; // sync timing
637 badregs_mask_prev = badregs_mask;