2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../psxinterpreter.h"
14 #include "../psxcounters.h"
15 #include "../psxevents.h"
16 #include "../r3000a.h"
17 #include "../gte_arm.h"
18 #include "../gte_neon.h"
22 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
24 //#define evprintf printf
27 void pcsx_mtc0(u32 reg, u32 val)
29 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
30 MTC0(&psxRegs, reg, val);
31 gen_interupt(&psxRegs.CP0);
33 //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq
34 if ((psxRegs.pc & 0x803ffeff) == 0x80000080)
35 pending_exception = 1;
38 void pcsx_mtc0_ds(u32 reg, u32 val)
40 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
41 MTC0(&psxRegs, reg, val);
44 void new_dyna_freeze(void *f, int mode)
46 const char header_save[8] = "ariblks";
47 uint32_t addrs[1024 * 4];
52 if (mode != 0) { // save
53 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
57 SaveFuncs.write(f, header_save, sizeof(header_save));
58 SaveFuncs.write(f, &size, sizeof(size));
59 SaveFuncs.write(f, addrs, size);
62 new_dyna_pcsx_mem_load_state();
64 bytes = SaveFuncs.read(f, header, sizeof(header));
65 if (bytes != sizeof(header) || strcmp(header, header_save)) {
67 SaveFuncs.seek(f, -bytes, SEEK_CUR);
70 SaveFuncs.read(f, &size, sizeof(size));
73 if (size > sizeof(addrs)) {
74 bytes = size - sizeof(addrs);
75 SaveFuncs.seek(f, bytes, SEEK_CUR);
78 bytes = SaveFuncs.read(f, addrs, size);
82 if (psxCpu != &psxInt)
83 new_dynarec_load_blocks(addrs, size);
86 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
89 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
92 void *gte_handlers[64];
94 void *gte_handlers_nf[64] = {
95 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
96 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
97 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
98 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
99 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
100 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
101 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
102 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
105 const char *gte_regnames[64] = {
106 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
107 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
108 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
109 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
110 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
111 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
112 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
113 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
120 #define GCBITS3(b0,b1,b2) \
121 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
122 #define GDBITS2(b0,b1) \
123 (GDBIT(b0) | GDBIT(b1))
124 #define GDBITS3(b0,b1,b2) \
125 (GDBITS2(b0,b1) | GDBIT(b2))
126 #define GDBITS4(b0,b1,b2,b3) \
127 (GDBITS3(b0,b1,b2) | GDBIT(b3))
128 #define GDBITS5(b0,b1,b2,b3,b4) \
129 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
130 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
131 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
132 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
133 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
134 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
135 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
136 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
137 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
138 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
139 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
141 const uint64_t gte_reg_reads[64] = {
142 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
143 [GTE_NCLIP] = GDBITS3(12,13,14),
144 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
145 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
146 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
147 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
148 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
149 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
150 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
151 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
152 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
153 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
154 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
155 [GTE_SQR] = GDBITS3(9,10,11),
156 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
157 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
158 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
159 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
160 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
161 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
162 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
163 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
166 // note: this excludes gteFLAG that is always written to
167 const uint64_t gte_reg_writes[64] = {
168 [GTE_RTPS] = 0x0f0f7f00ll,
169 [GTE_NCLIP] = GDBIT(24),
170 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
171 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
172 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
173 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
174 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
175 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
176 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
177 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
178 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
179 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
180 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
181 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
182 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
183 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
184 [GTE_AVSZ3] = GDBITS2(7,24),
185 [GTE_AVSZ4] = GDBITS2(7,24),
186 [GTE_RTPT] = 0x0f0f7f00ll,
187 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
188 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
189 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
192 static int ari64_init()
194 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
198 new_dyna_pcsx_mem_init();
200 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
201 if (psxCP2[i] != gteNULL)
202 gte_handlers[i] = psxCP2[i];
204 #if defined(__arm__) && !defined(DRC_DBG)
205 gte_handlers[0x06] = gteNCLIP_arm;
207 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
208 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
211 // compiler's _nf version is still a lot slower than neon
212 // _nf_arm RTPS is roughly the same, RTPT slower
213 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
214 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
218 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
221 zeromem_ptr = zero_mem;
222 scratch_buf_ptr = scratch_buf;
227 static void ari64_reset()
229 new_dyna_pcsx_mem_reset();
230 new_dynarec_invalidate_all_pages();
231 new_dyna_pcsx_mem_load_state();
232 pending_exception = 1;
235 // execute until predefined leave points
236 // (HLE softcall exit and BIOS fastboot end)
237 static void ari64_execute_until()
239 evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
240 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
242 new_dyna_start(dynarec_local);
244 evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
245 psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
248 static void ari64_execute()
251 schedule_timeslice();
252 ari64_execute_until();
253 evprintf("drc left @%08x\n", psxRegs.pc);
257 static void ari64_execute_block(enum blockExecCaller caller)
259 if (caller == EXEC_CALLER_BOOT)
262 next_interupt = psxRegs.cycle + 1;
263 ari64_execute_until();
265 if (caller == EXEC_CALLER_BOOT)
269 static void ari64_clear(u32 addr, u32 size)
271 size *= 4; /* PCSX uses DMA units (words) */
273 evprintf("ari64_clear %08x %04x\n", addr, size);
275 new_dynarec_invalidate_range(addr, addr + size);
278 static void ari64_notify(enum R3000Anote note, void *data) {
281 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
282 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
283 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
285 case R3000ACPU_NOTIFY_BEFORE_SAVE:
287 case R3000ACPU_NOTIFY_AFTER_LOAD:
290 psxInt.Notify(note, data);
295 static void ari64_apply_config()
299 if (Config.DisableStalls)
300 new_dynarec_hacks |= NDHACK_NO_STALLS;
302 new_dynarec_hacks &= ~NDHACK_NO_STALLS;
304 if (Config.cycle_multiplier != cycle_multiplier_old
305 || new_dynarec_hacks != new_dynarec_hacks_old)
307 new_dynarec_clear_full();
311 static void ari64_shutdown()
313 new_dynarec_cleanup();
314 new_dyna_pcsx_mem_shutdown();
328 #else // if DRC_DISABLE
330 unsigned int address;
331 int pending_exception, stop;
333 int new_dynarec_did_compile;
334 int cycle_multiplier_old;
335 int new_dynarec_hacks_pergame;
336 int new_dynarec_hacks_old;
337 int new_dynarec_hacks;
340 u32 zero_mem[0x1000/4];
342 void *scratch_buf_ptr;
343 void new_dynarec_init() {}
344 void new_dyna_start(void *context) {}
345 void new_dynarec_cleanup() {}
346 void new_dynarec_clear_full() {}
347 void new_dynarec_invalidate_all_pages() {}
348 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
349 void new_dyna_pcsx_mem_init(void) {}
350 void new_dyna_pcsx_mem_reset(void) {}
351 void new_dyna_pcsx_mem_load_state(void) {}
352 void new_dyna_pcsx_mem_isolate(int enable) {}
353 void new_dyna_pcsx_mem_shutdown(void) {}
354 int new_dynarec_save_blocks(void *save, int size) { return 0; }
355 void new_dynarec_load_blocks(const void *save, int size) {}
366 void dump_mem(const char *fname, void *mem, size_t size)
368 FILE *f1 = fopen(fname, "wb");
370 f1 = fopen(strrchr(fname, '/') + 1, "wb");
371 fwrite(mem, 1, size, f1);
375 static u32 memcheck_read(u32 a)
377 if ((a >> 16) == 0x1f80)
379 return *(u32 *)(psxH + (a & 0xfffc));
381 if ((a >> 16) == 0x1f00)
383 return *(u32 *)(psxP + (a & 0xfffc));
385 // if ((a & ~0xe0600000) < 0x200000)
387 return *(u32 *)(psxM + (a & 0x1ffffc));
391 void do_insn_trace(void)
393 static psxRegisters oldregs;
394 static u32 event_cycles_o[PSXINT_COUNT];
395 u32 *allregs_p = (void *)&psxRegs;
396 u32 *allregs_o = (void *)&oldregs;
401 //last_io_addr = 0x5e2c8;
403 f = fopen("tracelog", "wb");
406 oldregs.code = psxRegs.code; // don't care
407 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
408 if (allregs_p[i] != allregs_o[i]) {
410 fwrite(&allregs_p[i], 1, 4, f);
411 allregs_o[i] = allregs_p[i];
415 for (i = 0; i < PSXINT_COUNT; i++) {
416 if (event_cycles[i] != event_cycles_o[i]) {
418 fwrite(&byte, 1, 1, f);
420 fwrite(&event_cycles[i], 1, 4, f);
421 event_cycles_o[i] = event_cycles[i];
424 #define SAVE_IF_CHANGED(code_, name_) { \
425 static u32 old_##name_ = 0xbad0c0de; \
426 if (old_##name_ != name_) { \
428 fwrite(&byte, 1, 1, f); \
429 fwrite(&name_, 1, 4, f); \
430 old_##name_ = name_; \
433 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
434 SAVE_IF_CHANGED(0xfc, handler_cycle);
435 SAVE_IF_CHANGED(0xfd, last_io_addr);
436 io_data = memcheck_read(last_io_addr);
437 SAVE_IF_CHANGED(0xfe, io_data);
439 fwrite(&byte, 1, 1, f);
442 if (psxRegs.cycle == 190230) {
443 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
444 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
452 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
453 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
454 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
455 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
456 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
458 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
459 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
460 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
461 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
463 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
464 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
465 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
466 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
468 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
469 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
470 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
471 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
473 "PC", "code", "cycle", "interrupt",
481 static int miss_log_i;
482 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
483 #define miss_log_mask (miss_log_len-1)
485 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
487 miss_log[miss_log_i].reg = reg;
488 miss_log[miss_log_i].val = val;
489 miss_log[miss_log_i].val_expect = val_expect;
490 miss_log[miss_log_i].pc = pc;
491 miss_log[miss_log_i].cycle = cycle;
492 miss_log_i = (miss_log_i + 1) & miss_log_mask;
497 void do_insn_cmp(void)
499 extern int last_count;
500 static psxRegisters rregs;
501 static u32 mem_addr, mem_val;
502 static u32 irq_test_cycle_intr;
503 static u32 handler_cycle_intr;
504 u32 *allregs_p = (void *)&psxRegs;
505 u32 *allregs_e = (void *)&rregs;
506 u32 badregs_mask = 0;
507 static u32 ppc, failcount;
508 static u32 badregs_mask_prev;
509 int i, ret, bad = 0, fatal = 0, which_event = -1;
514 f = fopen("tracelog", "rb");
517 if ((ret = fread(&code, 1, 1, f)) <= 0)
526 fread(&which_event, 1, 1, f);
527 fread(&ev_cycles, 1, 4, f);
530 fread(&irq_test_cycle_intr, 1, 4, f);
533 fread(&handler_cycle_intr, 1, 4, f);
536 fread(&mem_addr, 1, 4, f);
539 fread(&mem_val, 1, 4, f);
542 assert(code < offsetof(psxRegisters, intCycle) / 4);
543 fread(&allregs_e[code], 1, 4, f);
551 psxRegs.code = rregs.code; // don't care
552 psxRegs.cycle += last_count;
553 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
554 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
556 //if (psxRegs.cycle == 166172) breakme();
558 if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
559 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
560 event_cycles[which_event], ev_cycles, psxRegs.cycle);
564 if (irq_test_cycle > irq_test_cycle_intr) {
565 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
569 if (handler_cycle != handler_cycle_intr) {
570 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
574 if (mem_val != memcheck_read(mem_addr)) {
575 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
579 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
584 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
585 if (allregs_p[i] != allregs_e[i]) {
586 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
591 badregs_mask |= 1u << i;
595 if (badregs_mask_prev & badregs_mask)
600 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) {
601 static int last_mcycle;
602 if (last_mcycle != psxRegs.cycle >> 20) {
603 printf("%u\n", psxRegs.cycle);
604 last_mcycle = psxRegs.cycle >> 20;
609 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
610 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
611 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
612 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
613 printf("-- %d\n", bad);
614 for (i = 0; i < 8; i++)
615 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
616 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
617 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
618 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
619 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
622 //psxRegs.cycle = rregs.cycle + 2; // sync timing
624 badregs_mask_prev = badregs_mask;