2 * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
4 * This work is licensed under the terms of GNU GPL version 2 or later.
5 * See the COPYING file in the top-level directory.
12 #include "../psxhle.h"
13 #include "../psxinterpreter.h"
14 #include "../psxcounters.h"
15 #include "../psxevents.h"
16 #include "../psxbios.h"
17 #include "../r3000a.h"
18 #include "../gte_arm.h"
19 #include "../gte_neon.h"
20 #include "compiler_features.h"
21 #include "arm_features.h"
24 #if defined(NDRC_THREAD) && !defined(DRC_DISABLE) && !defined(LIGHTREC)
25 #include "../../frontend/libretro-rthreads.h"
26 #include "features/features_cpu.h"
27 #include "retro_timers.h"
30 #include <3ds_utils.h>
34 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
37 //#define evprintf printf
40 static void ari64_thread_sync(void);
42 void ndrc_freeze(void *f, int mode)
44 const char header_save[8] = "ariblks";
45 uint32_t addrs[1024 * 4];
52 if (mode != 0) { // save
53 size = new_dynarec_save_blocks(addrs, sizeof(addrs));
57 SaveFuncs.write(f, header_save, sizeof(header_save));
58 SaveFuncs.write(f, &size, sizeof(size));
59 SaveFuncs.write(f, addrs, size);
62 bytes = SaveFuncs.read(f, header, sizeof(header));
63 if (bytes != sizeof(header) || strcmp(header, header_save)) {
65 SaveFuncs.seek(f, -bytes, SEEK_CUR);
68 SaveFuncs.read(f, &size, sizeof(size));
71 if (size > sizeof(addrs)) {
72 bytes = size - sizeof(addrs);
73 SaveFuncs.seek(f, bytes, SEEK_CUR);
76 bytes = SaveFuncs.read(f, addrs, size);
80 if (psxCpu != &psxInt)
81 new_dynarec_load_blocks(addrs, size);
84 //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
87 void ndrc_clear_full(void)
90 new_dynarec_clear_full();
93 #if !defined(DRC_DISABLE) && !defined(LIGHTREC)
94 #include "linkage_offsets.h"
96 static void ari64_thread_init(void);
97 static int ari64_thread_check_range(unsigned int start, unsigned int end);
99 void pcsx_mtc0(psxRegisters *regs, u32 reg, u32 val)
101 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle);
102 MTC0(regs, reg, val);
103 gen_interupt(®s->CP0);
106 void pcsx_mtc0_ds(psxRegisters *regs, u32 reg, u32 val)
108 evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle);
109 MTC0(regs, reg, val);
113 void *gte_handlers[64];
115 void *gte_handlers_nf[64] = {
116 NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
117 NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
118 gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
119 NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
120 gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
121 gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
122 gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
123 NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
126 const char *gte_regnames[64] = {
127 NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
128 NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
129 "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
130 NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
131 "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
132 "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
133 "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
134 NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
141 #define GCBITS3(b0,b1,b2) \
142 (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
143 #define GDBITS2(b0,b1) \
144 (GDBIT(b0) | GDBIT(b1))
145 #define GDBITS3(b0,b1,b2) \
146 (GDBITS2(b0,b1) | GDBIT(b2))
147 #define GDBITS4(b0,b1,b2,b3) \
148 (GDBITS3(b0,b1,b2) | GDBIT(b3))
149 #define GDBITS5(b0,b1,b2,b3,b4) \
150 (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
151 #define GDBITS6(b0,b1,b2,b3,b4,b5) \
152 (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
153 #define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
154 (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
155 #define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
156 (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
157 #define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
158 (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
159 #define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
160 (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
162 const uint64_t gte_reg_reads[64] = {
163 [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
164 [GTE_NCLIP] = GDBITS3(12,13,14),
165 [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11),
166 [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22),
167 [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
168 [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
169 [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
170 [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
171 [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
172 [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
173 [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
174 [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
175 [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
176 [GTE_SQR] = GDBITS3(9,10,11),
177 [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22),
178 [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22),
179 [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19),
180 [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19),
181 [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
182 [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22),
183 [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27),
184 [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
187 // note: this excludes gteFLAG that is always written to
188 const uint64_t gte_reg_writes[64] = {
189 [GTE_RTPS] = 0x0f0f7f00ll,
190 [GTE_NCLIP] = GDBIT(24),
191 [GTE_OP] = GDBITS6(9,10,11,25,26,27),
192 [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
193 [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
194 [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
195 [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27),
196 [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27),
197 [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27),
198 [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
199 [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27),
200 [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27),
201 [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
202 [GTE_SQR] = GDBITS6(9,10,11,25,26,27),
203 [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
204 [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
205 [GTE_AVSZ3] = GDBITS2(7,24),
206 [GTE_AVSZ4] = GDBITS2(7,24),
207 [GTE_RTPT] = 0x0f0f7f00ll,
208 [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27),
209 [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
210 [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
213 static void ari64_reset()
216 new_dyna_pcsx_mem_reset();
217 new_dynarec_invalidate_all_pages();
218 new_dyna_pcsx_mem_load_state();
221 // execute until predefined leave points
222 // (HLE softcall exit and BIOS fastboot end)
223 static void ari64_execute_until(psxRegisters *regs)
225 void *drc_local = (char *)regs - LO_psxRegs;
227 assert(drc_local == dynarec_local);
228 evprintf("+exec %08x, %u->%u (%d)\n", regs->pc, regs->cycle,
229 regs->next_interupt, regs->next_interupt - regs->cycle);
231 new_dyna_start(drc_local);
233 evprintf("-exec %08x, %u->%u (%d) stop %d \n", regs->pc, regs->cycle,
234 regs->next_interupt, regs->next_interupt - regs->cycle, regs->stop);
237 static void ari64_execute(struct psxRegisters *regs)
239 while (!regs->stop) {
240 schedule_timeslice(regs);
241 ari64_execute_until(regs);
242 evprintf("drc left @%08x\n", regs->pc);
246 static void ari64_execute_block(struct psxRegisters *regs, enum blockExecCaller caller)
248 if (caller == EXEC_CALLER_BOOT)
251 regs->next_interupt = regs->cycle + 1;
252 ari64_execute_until(regs);
254 if (caller == EXEC_CALLER_BOOT)
258 static void ari64_clear(u32 addr, u32 size)
260 u32 end = addr + size * 4; /* PCSX uses DMA units (words) */
262 evprintf("ari64_clear %08x %04x\n", addr, size * 4);
264 if (!new_dynarec_quick_check_range(addr, end) &&
265 !ari64_thread_check_range(addr, end))
269 new_dynarec_invalidate_range(addr, end);
272 static void ari64_on_ext_change(int ram_replaced, int other_cpu_emu_exec)
276 else if (other_cpu_emu_exec)
277 new_dyna_pcsx_mem_load_state();
280 static void ari64_notify(enum R3000Anote note, void *data) {
283 case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
284 case R3000ACPU_NOTIFY_CACHE_ISOLATED:
285 new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED);
287 case R3000ACPU_NOTIFY_BEFORE_SAVE:
289 case R3000ACPU_NOTIFY_AFTER_LOAD:
290 ari64_on_ext_change(data == NULL, 0);
291 psxInt.Notify(note, data);
296 static void ari64_apply_config()
303 if (Config.DisableStalls)
304 ndrc_g.hacks |= NDHACK_NO_STALLS;
306 ndrc_g.hacks &= ~NDHACK_NO_STALLS;
308 thread_changed = ((ndrc_g.hacks | ndrc_g.hacks_pergame) ^ ndrc_g.hacks_old)
309 & (NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON);
310 if (Config.cycle_multiplier != ndrc_g.cycle_multiplier_old
311 || (ndrc_g.hacks | ndrc_g.hacks_pergame) != ndrc_g.hacks_old)
313 new_dynarec_clear_full();
320 static void clear_local_cache(void)
322 #if defined(__arm__) || defined(__aarch64__)
323 if (ndrc_g.thread.dirty_start) {
324 // see "Ensuring the visibility of updates to instructions"
325 // in v7/v8 reference manuals (DDI0406, DDI0487 etc.)
326 #if defined(__aarch64__) || defined(HAVE_ARMV8)
327 // the actual clean/invalidate is broadcast to all cores,
328 // the manual only prescribes an isb
329 __asm__ volatile("isb");
330 //#elif defined(_3DS)
331 // ctr_invalidate_icache();
333 // while on v6 this is always required, on v7 it depends on
334 // "Multiprocessing Extensions" being present, but that is difficult
335 // to detect so do it always for now
336 new_dyna_clear_cache(ndrc_g.thread.dirty_start, ndrc_g.thread.dirty_end);
338 ndrc_g.thread.dirty_start = ndrc_g.thread.dirty_end = 0;
343 static void mixed_execute_block(struct psxRegisters *regs, enum blockExecCaller caller)
345 psxInt.ExecuteBlock(regs, caller);
348 static void mixed_clear(u32 addr, u32 size)
350 ari64_clear(addr, size);
351 psxInt.Clear(addr, size);
354 static void mixed_notify(enum R3000Anote note, void *data)
356 ari64_notify(note, data);
357 psxInt.Notify(note, data);
360 static R3000Acpu psxMixedCpu = {
361 NULL /* Init */, NULL /* Reset */, NULL /* Execute */,
365 NULL /* ApplyConfig */, NULL /* Shutdown */
368 static noinline void ari64_execute_threaded_slow(struct psxRegisters *regs,
369 enum blockExecCaller block_caller)
371 if (ndrc_g.thread.busy_addr == ~0u) {
372 memcpy(ndrc_smrv_regs, regs->GPR.r, sizeof(ndrc_smrv_regs));
373 slock_lock(ndrc_g.thread.lock);
374 ndrc_g.thread.busy_addr = regs->pc;
375 slock_unlock(ndrc_g.thread.lock);
376 scond_signal(ndrc_g.thread.cond);
379 //ari64_notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
380 psxInt.Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
381 assert(psxCpu == &psxRec);
382 psxCpu = &psxMixedCpu;
385 mixed_execute_block(regs, block_caller);
387 if (ndrc_g.thread.busy_addr == ~0u)
389 if (block_caller == EXEC_CALLER_HLE) {
390 if (!psxBiosSoftcallEnded())
394 else if (block_caller == EXEC_CALLER_BOOT) {
395 if (!psxExecuteBiosEnded())
404 psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
405 //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
406 ari64_on_ext_change(0, 1);
409 static void ari64_execute_threaded_once(struct psxRegisters *regs,
410 enum blockExecCaller block_caller)
412 void *drc_local = (char *)regs - LO_psxRegs;
413 struct ht_entry *hash_table =
414 *(void **)((char *)drc_local + LO_hash_table_ptr);
417 if (likely(ndrc_g.thread.busy_addr == ~0u)) {
418 target = ndrc_get_addr_ht_param(hash_table, regs->pc,
422 new_dyna_start_at(drc_local, target);
426 ari64_execute_threaded_slow(regs, block_caller);
429 static void ari64_execute_threaded(struct psxRegisters *regs)
431 schedule_timeslice(regs);
434 ari64_execute_threaded_once(regs, EXEC_CALLER_OTHER);
436 if ((s32)(regs->cycle - regs->next_interupt) >= 0)
437 schedule_timeslice(regs);
441 static void ari64_execute_threaded_block(struct psxRegisters *regs,
442 enum blockExecCaller caller)
444 if (caller == EXEC_CALLER_BOOT)
447 regs->next_interupt = regs->cycle + 1;
449 ari64_execute_threaded_once(regs, caller);
450 if (regs->cpuInRecursion) {
451 // must sync since we are returning to compiled code
455 if (caller == EXEC_CALLER_BOOT)
459 static void ari64_thread_sync(void)
461 if (!ndrc_g.thread.lock || ndrc_g.thread.busy_addr == ~0u)
464 slock_lock(ndrc_g.thread.lock);
465 slock_unlock(ndrc_g.thread.lock);
466 if (ndrc_g.thread.busy_addr == ~0)
472 static int ari64_thread_check_range(unsigned int start, unsigned int end)
474 u32 addr = ndrc_g.thread.busy_addr;
483 if (addr + MAXBLOCK * 4 <= start)
486 //SysPrintf("%x hits %x-%x\n", addr, start, end);
490 static void ari64_compile_thread(void *unused)
492 struct ht_entry *hash_table =
493 *(void **)((char *)dynarec_local + LO_hash_table_ptr);
497 slock_lock(ndrc_g.thread.lock);
498 while (!ndrc_g.thread.exit)
500 addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr;
502 scond_wait(ndrc_g.thread.cond, ndrc_g.thread.lock);
503 addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr;
504 if (addr == ~0u || ndrc_g.thread.exit)
507 target = ndrc_get_addr_ht_param(hash_table, addr,
508 ndrc_cm_compile_in_thread);
509 //printf("c %08x -> %p\n", addr, target);
510 ndrc_g.thread.busy_addr = ~0u;
512 slock_unlock(ndrc_g.thread.lock);
516 static void ari64_thread_shutdown(void)
518 psxRec.Execute = ari64_execute;
519 psxRec.ExecuteBlock = ari64_execute_block;
521 if (ndrc_g.thread.lock)
522 slock_lock(ndrc_g.thread.lock);
523 ndrc_g.thread.exit = 1;
524 if (ndrc_g.thread.lock)
525 slock_unlock(ndrc_g.thread.lock);
526 if (ndrc_g.thread.cond)
527 scond_signal(ndrc_g.thread.cond);
528 if (ndrc_g.thread.handle) {
529 sthread_join(ndrc_g.thread.handle);
530 ndrc_g.thread.handle = NULL;
532 if (ndrc_g.thread.cond) {
533 scond_free(ndrc_g.thread.cond);
534 ndrc_g.thread.cond = NULL;
536 if (ndrc_g.thread.lock) {
537 slock_free(ndrc_g.thread.lock);
538 ndrc_g.thread.lock = NULL;
540 ndrc_g.thread.busy_addr = ~0u;
543 static void ari64_thread_init(void)
547 if (ndrc_g.hacks_pergame & NDHACK_THREAD_FORCE)
549 else if (ndrc_g.hacks & NDHACK_THREAD_FORCE)
550 enable = ndrc_g.hacks & NDHACK_THREAD_FORCE_ON;
552 u32 cpu_count = cpu_features_get_core_amount();
553 enable = cpu_count > 1;
555 // bad for old3ds, reprotedly no improvement for new3ds
560 if (!ndrc_g.thread.handle == !enable)
563 ari64_thread_shutdown();
564 ndrc_g.thread.exit = 0;
565 ndrc_g.thread.busy_addr = ~0u;
568 ndrc_g.thread.lock = slock_new();
569 ndrc_g.thread.cond = scond_new();
571 if (ndrc_g.thread.lock && ndrc_g.thread.cond)
572 ndrc_g.thread.handle = pcsxr_sthread_create(ari64_compile_thread, PCSXRT_DRC);
573 if (ndrc_g.thread.handle) {
574 psxRec.Execute = ari64_execute_threaded;
575 psxRec.ExecuteBlock = ari64_execute_threaded_block;
578 // clean up potential incomplete init
579 ari64_thread_shutdown();
581 SysPrintf("compiler thread %sabled\n", ndrc_g.thread.handle ? "en" : "dis");
583 #else // if !NDRC_THREAD
584 static void ari64_thread_init(void) {}
585 static void ari64_thread_shutdown(void) {}
586 static int ari64_thread_check_range(unsigned int start, unsigned int end) { return 0; }
589 static int ari64_init()
591 static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
595 new_dyna_pcsx_mem_init();
597 for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
598 if (psxCP2[i] != gteNULL)
599 gte_handlers[i] = psxCP2[i];
601 #if defined(__arm__) && !defined(DRC_DBG)
602 gte_handlers[0x06] = gteNCLIP_arm;
604 gte_handlers_nf[0x01] = gteRTPS_nf_arm;
605 gte_handlers_nf[0x30] = gteRTPT_nf_arm;
608 // compiler's _nf version is still a lot slower than neon
609 // _nf_arm RTPS is roughly the same, RTPT slower
610 gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
611 gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
615 memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
618 zeromem_ptr = zero_mem;
619 scratch_buf_ptr = scratch_buf; // for gte_neon.S
621 ndrc_g.cycle_multiplier_old = Config.cycle_multiplier;
622 ndrc_g.hacks_old = ndrc_g.hacks | ndrc_g.hacks_pergame;
623 ari64_apply_config();
629 static void ari64_shutdown()
631 ari64_thread_shutdown();
632 new_dynarec_cleanup();
633 new_dyna_pcsx_mem_shutdown();
647 #else // if DRC_DISABLE
649 struct ndrc_globals ndrc_g; // dummy
650 void new_dynarec_init() {}
651 void new_dyna_start(void *context) {}
652 void new_dynarec_cleanup() {}
653 void new_dynarec_clear_full() {}
654 void new_dynarec_invalidate_all_pages() {}
655 void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {}
656 void new_dyna_pcsx_mem_init(void) {}
657 void new_dyna_pcsx_mem_reset(void) {}
658 void new_dyna_pcsx_mem_load_state(void) {}
659 void new_dyna_pcsx_mem_isolate(int enable) {}
660 void new_dyna_pcsx_mem_shutdown(void) {}
661 int new_dynarec_save_blocks(void *save, int size) { return 0; }
662 void new_dynarec_load_blocks(const void *save, int size) {}
664 #endif // DRC_DISABLE
667 static void ari64_thread_sync(void) {}
678 void dump_mem(const char *fname, void *mem, size_t size)
680 FILE *f1 = fopen(fname, "wb");
682 f1 = fopen(strrchr(fname, '/') + 1, "wb");
683 fwrite(mem, 1, size, f1);
687 static u32 memcheck_read(u32 a)
689 if ((a >> 16) == 0x1f80)
691 return *(u32 *)(psxH + (a & 0xfffc));
693 if ((a >> 16) == 0x1f00)
695 return *(u32 *)(psxP + (a & 0xfffc));
697 // if ((a & ~0xe0600000) < 0x200000)
699 return *(u32 *)(psxM + (a & 0x1ffffc));
703 void do_insn_trace(void)
705 static psxRegisters oldregs;
706 static u32 event_cycles_o[PSXINT_COUNT];
707 u32 *allregs_p = (void *)&psxRegs;
708 u32 *allregs_o = (void *)&oldregs;
713 //last_io_addr = 0x5e2c8;
715 f = fopen("tracelog", "wb");
718 oldregs.code = psxRegs.code; // don't care
719 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
720 if (allregs_p[i] != allregs_o[i]) {
722 fwrite(&allregs_p[i], 1, 4, f);
723 allregs_o[i] = allregs_p[i];
727 for (i = 0; i < PSXINT_COUNT; i++) {
728 if (psxRegs.event_cycles[i] != event_cycles_o[i]) {
730 fwrite(&byte, 1, 1, f);
732 fwrite(&psxRegs.event_cycles[i], 1, 4, f);
733 event_cycles_o[i] = psxRegs.event_cycles[i];
736 #define SAVE_IF_CHANGED(code_, name_) { \
737 static u32 old_##name_ = 0xbad0c0de; \
738 if (old_##name_ != name_) { \
740 fwrite(&byte, 1, 1, f); \
741 fwrite(&name_, 1, 4, f); \
742 old_##name_ = name_; \
745 SAVE_IF_CHANGED(0xfb, irq_test_cycle);
746 SAVE_IF_CHANGED(0xfc, handler_cycle);
747 SAVE_IF_CHANGED(0xfd, last_io_addr);
748 io_data = memcheck_read(last_io_addr);
749 SAVE_IF_CHANGED(0xfe, io_data);
751 fwrite(&byte, 1, 1, f);
754 if (psxRegs.cycle == 190230) {
755 dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
756 dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
764 static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
765 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
766 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
767 "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
768 "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
770 "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7",
771 "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
772 "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
773 "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
775 "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7",
776 "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
777 "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
778 "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
780 "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7",
781 "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
782 "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
783 "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
785 "PC", "code", "cycle", "interrupt",
793 static int miss_log_i;
794 #define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
795 #define miss_log_mask (miss_log_len-1)
797 static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
799 miss_log[miss_log_i].reg = reg;
800 miss_log[miss_log_i].val = val;
801 miss_log[miss_log_i].val_expect = val_expect;
802 miss_log[miss_log_i].pc = pc;
803 miss_log[miss_log_i].cycle = cycle;
804 miss_log_i = (miss_log_i + 1) & miss_log_mask;
809 void do_insn_cmp(void)
811 extern int last_count;
812 static psxRegisters rregs;
813 static u32 mem_addr, mem_val;
814 static u32 irq_test_cycle_intr;
815 static u32 handler_cycle_intr;
816 u32 *allregs_p = (void *)&psxRegs;
817 u32 *allregs_e = (void *)&rregs;
818 u32 badregs_mask = 0;
819 static u32 ppc, failcount;
820 static u32 badregs_mask_prev;
821 int i, ret, bad = 0, fatal = 0, which_event = -1;
826 f = fopen("tracelog", "rb");
829 if ((ret = fread(&code, 1, 1, f)) <= 0)
838 fread(&which_event, 1, 1, f);
839 fread(&ev_cycles, 1, 4, f);
842 fread(&irq_test_cycle_intr, 1, 4, f);
845 fread(&handler_cycle_intr, 1, 4, f);
848 fread(&mem_addr, 1, 4, f);
851 fread(&mem_val, 1, 4, f);
854 assert(code < offsetof(psxRegisters, intCycle) / 4);
855 fread(&allregs_e[code], 1, 4, f);
863 psxRegs.code = rregs.code; // don't care
864 psxRegs.cycle += last_count;
865 //psxRegs.cycle = rregs.cycle; // needs reload in _cmp
866 psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
868 //if (psxRegs.cycle == 166172) breakme();
870 if (which_event >= 0 && psxRegs.event_cycles[which_event] != ev_cycles) {
871 printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
872 psxRegs.event_cycles[which_event], ev_cycles, psxRegs.cycle);
876 if (irq_test_cycle > irq_test_cycle_intr) {
877 printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr);
881 if (handler_cycle != handler_cycle_intr) {
882 printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr);
886 if (mem_val != memcheck_read(mem_addr)) {
887 printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
891 if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) {
896 for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
897 if (allregs_p[i] != allregs_e[i]) {
898 miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
903 badregs_mask |= 1u << i;
907 if (badregs_mask_prev & badregs_mask)
912 if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) {
913 static int last_mcycle;
914 if (last_mcycle != psxRegs.cycle >> 20) {
915 printf("%u\n", psxRegs.cycle);
916 last_mcycle = psxRegs.cycle >> 20;
921 for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
922 printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
923 regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
924 miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
925 printf("-- %d\n", bad);
926 for (i = 0; i < 8; i++)
927 printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
928 i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
929 printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc,
930 psxRegs.cycle, psxRegs.next_interupt);
931 //dump_mem("/tmp/psxram.dump", psxM, 0x200000);
932 //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
935 //psxRegs.cycle = rregs.cycle + 2; // sync timing
937 badregs_mask_prev = badregs_mask;