2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
13 #include "../psxhle.h"
14 #include "../psxinterpreter.h"
15 #include "../r3000a.h"
16 #include "../gte_arm.h"
17 #include "../gte_neon.h"
21 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
23 //#define evprintf printf
26 void pcsx_mtc0(u32 reg, u32 val)
28 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
29 MTC0(&psxRegs, reg, val);
30 gen_interupt(&psxRegs.CP0);
32 //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq
33 if ((psxRegs.pc & 0x803ffeff) == 0x80000080)
34 pending_exception = 1;
37 void pcsx_mtc0_ds(u32 reg, u32 val)
39 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
40 MTC0(&psxRegs, reg, val);
43 static void new_dyna_restore(void)
46 for (i = 0; i < PSXINT_COUNT; i++)
47 event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
49 event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
50 psxRegs.interrupt |= 1 << PSXINT_RCNT;
51 psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
53 new_dyna_pcsx_mem_load_state();
56 void new_dyna_freeze(void *f, int mode)
58 const char header_save[8] = "ariblks";
59 uint32_t addrs[1024 * 4];
64 if (mode != 0) { // save
65 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
69 SaveFuncs.write(f, header_save, sizeof(header_save));
70 SaveFuncs.write(f, &size, sizeof(size));
71 SaveFuncs.write(f, addrs, size);
76 bytes = SaveFuncs.read(f, header, sizeof(header));
77 if (bytes != sizeof(header) || strcmp(header, header_save)) {
79 SaveFuncs.seek(f, -bytes, SEEK_CUR);
82 SaveFuncs.read(f, &size, sizeof(size));
85 if (size > sizeof(addrs)) {
86 bytes = size - sizeof(addrs);
87 SaveFuncs.seek(f, bytes, SEEK_CUR);
90 bytes = SaveFuncs.read(f, addrs, size);
94 if (psxCpu != &psxInt)
95 new_dynarec_load_blocks(addrs, size);
98 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
101 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
104 void *gte_handlers[64];
106 void *gte_handlers_nf[64] = {
107 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
108 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
109 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
110 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
111 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
112 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
113 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
114 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
117 const char *gte_regnames[64] = {
118 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
119 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
120 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
121 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
122 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
123 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
124 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
125 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
132 #define GCBITS3(b0,b1,b2) \
133 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
134 #define GDBITS2(b0,b1) \
135 (GDBIT(b0) | GDBIT(b1))
136 #define GDBITS3(b0,b1,b2) \
137 (GDBITS2(b0,b1) | GDBIT(b2))
138 #define GDBITS4(b0,b1,b2,b3) \
139 (GDBITS3(b0,b1,b2) | GDBIT(b3))
140 #define GDBITS5(b0,b1,b2,b3,b4) \
141 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
142 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
143 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
144 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
145 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
146 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
147 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
148 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
149 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
150 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
151 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
153 const uint64_t gte_reg_reads[64] = {
154 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
155 [GTE_NCLIP] = GDBITS3(12,13,14),
156 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
157 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
158 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
159 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
160 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
161 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
162 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
163 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
164 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
165 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
166 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
167 [GTE_SQR] = GDBITS3(9,10,11),
168 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
169 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
170 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
171 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
172 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
173 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
174 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
175 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
178 // note: this excludes gteFLAG that is always written to
179 const uint64_t gte_reg_writes[64] = {
180 [GTE_RTPS] = 0x0f0f7f00ll,
181 [GTE_NCLIP] = GDBIT(24),
182 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
183 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
184 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
185 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
186 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
187 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
188 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
189 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
190 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
191 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
192 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
193 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
194 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
195 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
196 [GTE_AVSZ3] = GDBITS2(7,24),
197 [GTE_AVSZ4] = GDBITS2(7,24),
198 [GTE_RTPT] = 0x0f0f7f00ll,
199 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
200 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
201 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
204 static int ari64_init()
206 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
210 new_dyna_pcsx_mem_init();
212 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
213 if (psxCP2[i] != gteNULL)
214 gte_handlers[i] = psxCP2[i];
216 #if defined(__arm__) && !defined(DRC_DBG)
217 gte_handlers[0x06] = gteNCLIP_arm;
219 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
220 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
223 // compiler's _nf version is still a lot slower than neon
224 // _nf_arm RTPS is roughly the same, RTPT slower
225 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
226 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
230 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
233 zeromem_ptr = zero_mem;
234 scratch_buf_ptr = scratch_buf;
239 static void ari64_reset()
241 new_dyna_pcsx_mem_reset();
242 new_dynarec_invalidate_all_pages();
244 pending_exception = 1;
247 // execute until predefined leave points
248 // (HLE softcall exit and BIOS fastboot end)
249 static void ari64_execute_until()
251 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
252 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
254 new_dyna_start(dynarec_local);
256 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
257 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
260 static void ari64_execute()
263 schedule_timeslice();
264 ari64_execute_until();
265 evprintf("drc left @%08x\n", psxRegs.pc);
269 static void ari64_execute_block(enum blockExecCaller caller)
271 if (caller == EXEC_CALLER_BOOT)
274 next_interupt = psxRegs.cycle + 1;
275 ari64_execute_until();
277 if (caller == EXEC_CALLER_BOOT)
281 static void ari64_clear(u32 addr, u32 size)
283 size *= 4; /* PCSX uses DMA units (words) */
285 evprintf("ari64_clear %08x %04x\n", addr, size);
287 new_dynarec_invalidate_range(addr, addr + size);
290 static void ari64_notify(enum R3000Anote note, void *data) {
293 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
294 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
295 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
297 case R3000ACPU_NOTIFY_BEFORE_SAVE:
299 case R3000ACPU_NOTIFY_AFTER_LOAD:
301 psxInt.Notify(note, data);
306 static void ari64_apply_config()
310 if (Config.DisableStalls)
311 new_dynarec_hacks |= NDHACK_NO_STALLS;
313 new_dynarec_hacks &= ~NDHACK_NO_STALLS;
315 if (Config.cycle_multiplier != cycle_multiplier_old
316 || new_dynarec_hacks != new_dynarec_hacks_old)
318 new_dynarec_clear_full();
322 static void ari64_shutdown()
324 new_dynarec_cleanup();
325 new_dyna_pcsx_mem_shutdown();
339 #else // if DRC_DISABLE
341 unsigned int address;
342 int pending_exception, stop;
344 int new_dynarec_did_compile;
345 int cycle_multiplier_old;
346 int new_dynarec_hacks_pergame;
347 int new_dynarec_hacks_old;
348 int new_dynarec_hacks;
351 u32 zero_mem[0x1000/4];
353 void *scratch_buf_ptr;
354 void new_dynarec_init() {}
355 void new_dyna_start(void *context) {}
356 void new_dynarec_cleanup() {}
357 void new_dynarec_clear_full() {}
358 void new_dynarec_invalidate_all_pages() {}
359 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
360 void new_dyna_pcsx_mem_init(void) {}
361 void new_dyna_pcsx_mem_reset(void) {}
362 void new_dyna_pcsx_mem_load_state(void) {}
363 void new_dyna_pcsx_mem_isolate(int enable) {}
364 void new_dyna_pcsx_mem_shutdown(void) {}
365 int new_dynarec_save_blocks(void *save, int size) { return 0; }
366 void new_dynarec_load_blocks(const void *save, int size) {}
377 void dump_mem(const char *fname, void *mem, size_t size)
379 FILE *f1 = fopen(fname, "wb");
381 f1 = fopen(strrchr(fname, '/') + 1, "wb");
382 fwrite(mem, 1, size, f1);
386 static u32 memcheck_read(u32 a)
388 if ((a >> 16) == 0x1f80)
390 return *(u32 *)(psxH + (a & 0xfffc));
392 if ((a >> 16) == 0x1f00)
394 return *(u32 *)(psxP + (a & 0xfffc));
396 // if ((a & ~0xe0600000) < 0x200000)
398 return *(u32 *)(psxM + (a & 0x1ffffc));
402 void do_insn_trace(void)
404 static psxRegisters oldregs;
405 static u32 event_cycles_o[PSXINT_COUNT];
406 u32 *allregs_p = (void *)&psxRegs;
407 u32 *allregs_o = (void *)&oldregs;
412 //last_io_addr = 0x5e2c8;
414 f = fopen("tracelog", "wb");
417 oldregs.code = psxRegs.code; // don't care
418 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
419 if (allregs_p[i] != allregs_o[i]) {
421 fwrite(&allregs_p[i], 1, 4, f);
422 allregs_o[i] = allregs_p[i];
426 for (i = 0; i < PSXINT_COUNT; i++) {
427 if (event_cycles[i] != event_cycles_o[i]) {
429 fwrite(&byte, 1, 1, f);
431 fwrite(&event_cycles[i], 1, 4, f);
432 event_cycles_o[i] = event_cycles[i];
435 #define SAVE_IF_CHANGED(code_, name_) { \
436 static u32 old_##name_ = 0xbad0c0de; \
437 if (old_##name_ != name_) { \
439 fwrite(&byte, 1, 1, f); \
440 fwrite(&name_, 1, 4, f); \
441 old_##name_ = name_; \
444 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
445 SAVE_IF_CHANGED(0xfc, handler_cycle);
446 SAVE_IF_CHANGED(0xfd, last_io_addr);
447 io_data = memcheck_read(last_io_addr);
448 SAVE_IF_CHANGED(0xfe, io_data);
450 fwrite(&byte, 1, 1, f);
453 if (psxRegs.cycle == 190230) {
454 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
455 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
463 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
464 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
465 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
466 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
467 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
469 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
470 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
471 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
472 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
474 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
475 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
476 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
477 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
479 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
480 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
481 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
482 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
484 "PC", "code", "cycle", "interrupt",
492 static int miss_log_i;
493 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
494 #define miss_log_mask (miss_log_len-1)
496 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
498 miss_log[miss_log_i].reg = reg;
499 miss_log[miss_log_i].val = val;
500 miss_log[miss_log_i].val_expect = val_expect;
501 miss_log[miss_log_i].pc = pc;
502 miss_log[miss_log_i].cycle = cycle;
503 miss_log_i = (miss_log_i + 1) & miss_log_mask;
508 void do_insn_cmp(void)
510 extern int last_count;
511 static psxRegisters rregs;
512 static u32 mem_addr, mem_val;
513 static u32 irq_test_cycle_intr;
514 static u32 handler_cycle_intr;
515 u32 *allregs_p = (void *)&psxRegs;
516 u32 *allregs_e = (void *)&rregs;
517 u32 badregs_mask = 0;
518 static u32 ppc, failcount;
519 static u32 badregs_mask_prev;
520 int i, ret, bad = 0, fatal = 0, which_event = -1;
525 f = fopen("tracelog", "rb");
528 if ((ret = fread(&code, 1, 1, f)) <= 0)
537 fread(&which_event, 1, 1, f);
538 fread(&ev_cycles, 1, 4, f);
541 fread(&irq_test_cycle_intr, 1, 4, f);
544 fread(&handler_cycle_intr, 1, 4, f);
547 fread(&mem_addr, 1, 4, f);
550 fread(&mem_val, 1, 4, f);
553 assert(code < offsetof(psxRegisters, intCycle) / 4);
554 fread(&allregs_e[code], 1, 4, f);
562 psxRegs.code = rregs.code; // don't care
563 psxRegs.cycle += last_count;
564 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
565 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
567 //if (psxRegs.cycle == 166172) breakme();
569 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
570 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
571 event_cycles[which_event], ev_cycles, psxRegs.cycle);
575 if (irq_test_cycle > irq_test_cycle_intr) {
576 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
580 if (handler_cycle != handler_cycle_intr) {
581 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
585 if (mem_val != memcheck_read(mem_addr)) {
586 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
590 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
595 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
596 if (allregs_p[i] != allregs_e[i]) {
597 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
602 badregs_mask |= 1u << i;
606 if (badregs_mask_prev & badregs_mask)
611 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) {
612 static int last_mcycle;
613 if (last_mcycle != psxRegs.cycle >> 20) {
614 printf("%u\n", psxRegs.cycle);
615 last_mcycle = psxRegs.cycle >> 20;
620 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
621 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
622 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
623 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
624 printf("-- %d\n", bad);
625 for (i = 0; i < 8; i++)
626 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
627 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
628 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
629 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
630 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
633 //psxRegs.cycle = rregs.cycle + 2; // sync timing
635 badregs_mask_prev = badregs_mask;