X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Femu_if.c;h=6c1b48c59c7c5c2d44e86488e1765e7d8ebc1fe4;hb=HEAD;hp=852d881e572296d9c5c6edfd3674da617e2ecd57;hpb=5b8c000f969c365d48418781d8f88f9c58d65611;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 852d881e..cefadd21 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -10,137 +10,103 @@ #include "emu_if.h" #include "pcsxmem.h" #include "../psxhle.h" +#include "../psxinterpreter.h" +#include "../psxcounters.h" +#include "../psxevents.h" +#include "../psxbios.h" #include "../r3000a.h" -#include "../cdrom.h" -#include "../psxdma.h" -#include "../mdec.h" #include "../gte_arm.h" #include "../gte_neon.h" +#include "compiler_features.h" +#include "arm_features.h" #define FLAGLESS #include "../gte.h" +#if defined(NDRC_THREAD) && !defined(DRC_DISABLE) && !defined(LIGHTREC) +#include "../../frontend/libretro-rthreads.h" +#include "features/features_cpu.h" +#include "retro_timers.h" +#endif +#ifdef _3DS +#include <3ds_utils.h> +#endif +#ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif //#define evprintf printf #define evprintf(...) -char invalid_code[0x100000]; -u32 event_cycles[PSXINT_COUNT]; - -static void schedule_timeslice(void) -{ - u32 i, c = psxRegs.cycle; - u32 irqs = psxRegs.interrupt; - s32 min, dif; - - min = PSXCLK; - for (i = 0; irqs != 0; i++, irqs >>= 1) { - if (!(irqs & 1)) - continue; - dif = event_cycles[i] - c; - //evprintf(" ev %d\n", dif); - if (0 < dif && dif < min) - min = dif; - } - next_interupt = c + min; -} - -typedef void (irq_func)(); - -static irq_func * const irq_funcs[] = { - [PSXINT_SIO] = sioInterrupt, - [PSXINT_CDR] = cdrInterrupt, - [PSXINT_CDREAD] = cdrReadInterrupt, - [PSXINT_GPUDMA] = gpuInterrupt, - [PSXINT_MDECOUTDMA] = mdec1Interrupt, - [PSXINT_SPUDMA] = spuInterrupt, - [PSXINT_MDECINDMA] = mdec0Interrupt, - [PSXINT_GPUOTCDMA] = gpuotcInterrupt, - [PSXINT_CDRDMA] = cdrDmaInterrupt, - [PSXINT_CDRLID] = cdrLidSeekInterrupt, - [PSXINT_CDRPLAY] = cdrPlayInterrupt, - [PSXINT_RCNT] = psxRcntUpdate, -}; +static void ari64_thread_sync(void); -/* local dupe of psxBranchTest, using event_cycles */ -static void irq_test(void) +void ndrc_freeze(void *f, int mode) { - u32 irqs = psxRegs.interrupt; - u32 cycle = psxRegs.cycle; - u32 irq, irq_bits; - - // irq_funcs() may queue more irqs - psxRegs.interrupt = 0; - - for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) { - if (!(irq_bits & 1)) - continue; - if ((s32)(cycle - event_cycles[irq]) >= 0) { - irqs &= ~(1 << irq); - irq_funcs[irq](); - } + const char header_save[8] = "ariblks"; + uint32_t addrs[1024 * 4]; + int32_t size = 0; + int bytes; + char header[8]; + + ari64_thread_sync(); + + if (mode != 0) { // save + size = new_dynarec_save_blocks(addrs, sizeof(addrs)); + if (size == 0) + return; + + SaveFuncs.write(f, header_save, sizeof(header_save)); + SaveFuncs.write(f, &size, sizeof(size)); + SaveFuncs.write(f, addrs, size); } - psxRegs.interrupt |= irqs; + else { + bytes = SaveFuncs.read(f, header, sizeof(header)); + if (bytes != sizeof(header) || strcmp(header, header_save)) { + if (bytes > 0) + SaveFuncs.seek(f, -bytes, SEEK_CUR); + return; + } + SaveFuncs.read(f, &size, sizeof(size)); + if (size <= 0) + return; + if (size > sizeof(addrs)) { + bytes = size - sizeof(addrs); + SaveFuncs.seek(f, bytes, SEEK_CUR); + size = sizeof(addrs); + } + bytes = SaveFuncs.read(f, addrs, size); + if (bytes != size) + return; - if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) { - psxException(0x400, 0); - pending_exception = 1; + if (psxCpu != &psxInt) + new_dynarec_load_blocks(addrs, size); } -} - -void gen_interupt() -{ - evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt); - - irq_test(); - //psxBranchTest(); - //pending_exception = 1; - schedule_timeslice(); - - evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, - next_interupt, next_interupt - psxRegs.cycle); + //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); } -// from interpreter -extern void MTC0(int reg, u32 val); - -void pcsx_mtc0(u32 reg, u32 val) +void ndrc_clear_full(void) { - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); - gen_interupt(); + ari64_thread_sync(); + new_dynarec_clear_full(); } -void pcsx_mtc0_ds(u32 reg, u32 val) -{ - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); -} - -void new_dyna_save(void) -{ - psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) +#include "linkage_offsets.h" - // psxRegs.intCycle is always maintained, no need to convert -} +static void ari64_thread_init(void); +static int ari64_thread_check_range(unsigned int start, unsigned int end); -void new_dyna_after_save(void) +void pcsx_mtc0(psxRegisters *regs, u32 reg, u32 val) { - psxRegs.interrupt |= 1 << PSXINT_RCNT; + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle); + MTC0(regs, reg, val); + gen_interupt(®s->CP0); } -void new_dyna_restore(void) +void pcsx_mtc0_ds(psxRegisters *regs, u32 reg, u32 val) { - int i; - for (i = 0; i < PSXINT_COUNT; i++) - event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; - - event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; - psxRegs.interrupt |= 1 << PSXINT_RCNT; - psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; - - new_dyna_pcsx_mem_load_state(); + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle); + MTC0(regs, reg, val); } /* GTE stuff */ @@ -168,15 +134,6 @@ const char *gte_regnames[64] = { NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 }; -/* from gte.txt.. not sure if this is any good. */ -const char gte_cycletab[64] = { - /* 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, - 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, - 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, -}; - #define GCBIT(x) \ (1ll << (32+x)) #define GDBIT(x) \ @@ -208,13 +165,13 @@ const uint64_t gte_reg_reads[64] = { [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11), [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22), [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), - [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further? - [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22), - [GTE_CDP] = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22), + [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further? + [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22), + [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22), [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8), - [GTE_NCCS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22), + [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22), - [GTE_NCS] = 0x001fff0000000000ll | GDBITS4(0,1,21,22), + [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), [GTE_SQR] = GDBITS3(9,10,11), [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), @@ -253,154 +210,472 @@ const uint64_t gte_reg_writes[64] = { [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), }; -static int ari64_init() +static void ari64_reset() { - extern void (*psxCP2[64])(); - extern void psxNULL(); - size_t i; + ari64_thread_sync(); + new_dyna_pcsx_mem_reset(); + new_dynarec_invalidate_all_pages(); + new_dyna_pcsx_mem_load_state(); +} - new_dynarec_init(); - new_dyna_pcsx_mem_init(); +// execute until predefined leave points +// (HLE softcall exit and BIOS fastboot end) +static void ari64_execute_until(psxRegisters *regs) +{ + void *drc_local = (char *)regs - LO_psxRegs; - for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) - if (psxCP2[i] != psxNULL) - gte_handlers[i] = psxCP2[i]; + assert(drc_local == dynarec_local); + evprintf("+exec %08x, %u->%u (%d)\n", regs->pc, regs->cycle, + regs->next_interupt, regs->next_interupt - regs->cycle); -#if !defined(DRC_DBG) -#ifdef __arm__ - gte_handlers[0x06] = gteNCLIP_arm; - gte_handlers_nf[0x01] = gteRTPS_nf_arm; - gte_handlers_nf[0x30] = gteRTPT_nf_arm; -#endif -#ifdef __ARM_NEON__ - // compiler's _nf version is still a lot slower than neon - // _nf_arm RTPS is roughly the same, RTPT slower - gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; - gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; -#endif + new_dyna_start(drc_local); + + evprintf("-exec %08x, %u->%u (%d) stop %d \n", regs->pc, regs->cycle, + regs->next_interupt, regs->next_interupt - regs->cycle, regs->stop); +} + +static void ari64_execute(struct psxRegisters *regs) +{ + while (!regs->stop) { + schedule_timeslice(regs); + ari64_execute_until(regs); + evprintf("drc left @%08x\n", regs->pc); + } +} + +static void ari64_execute_block(struct psxRegisters *regs, enum blockExecCaller caller) +{ + if (caller == EXEC_CALLER_BOOT) + regs->stop++; + + regs->next_interupt = regs->cycle + 1; + ari64_execute_until(regs); + + if (caller == EXEC_CALLER_BOOT) + regs->stop--; +} + +static void ari64_clear(u32 addr, u32 size) +{ + u32 end = addr + size * 4; /* PCSX uses DMA units (words) */ + + evprintf("ari64_clear %08x %04x\n", addr, size * 4); + + if (!new_dynarec_quick_check_range(addr, end) && + !ari64_thread_check_range(addr, end)) + return; + + ari64_thread_sync(); + new_dynarec_invalidate_range(addr, end); +} + +static void ari64_on_ext_change(int ram_replaced, int other_cpu_emu_exec) +{ + if (ram_replaced) + ari64_reset(); + else if (other_cpu_emu_exec) + new_dyna_pcsx_mem_load_state(); +} + +static void ari64_notify(enum R3000Anote note, void *data) { + switch (note) + { + case R3000ACPU_NOTIFY_CACHE_UNISOLATED: + case R3000ACPU_NOTIFY_CACHE_ISOLATED: + new_dyna_pcsx_mem_isolate(note == R3000ACPU_NOTIFY_CACHE_ISOLATED); + break; + case R3000ACPU_NOTIFY_BEFORE_SAVE: + break; + case R3000ACPU_NOTIFY_AFTER_LOAD: + ari64_on_ext_change(data == NULL, 0); + psxInt.Notify(note, data); + break; + } +} + +static void ari64_apply_config() +{ + int thread_changed; + + ari64_thread_sync(); + intApplyConfig(); + + if (Config.DisableStalls) + ndrc_g.hacks |= NDHACK_NO_STALLS; + else + ndrc_g.hacks &= ~NDHACK_NO_STALLS; + + thread_changed = ((ndrc_g.hacks | ndrc_g.hacks_pergame) ^ ndrc_g.hacks_old) + & (NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON); + if (Config.cycle_multiplier != ndrc_g.cycle_multiplier_old + || (ndrc_g.hacks | ndrc_g.hacks_pergame) != ndrc_g.hacks_old) + { + new_dynarec_clear_full(); + } + if (thread_changed) + ari64_thread_init(); +} + +#ifdef NDRC_THREAD +static void clear_local_cache(void) +{ +#if defined(__arm__) || defined(__aarch64__) + if (ndrc_g.thread.dirty_start) { + // see "Ensuring the visibility of updates to instructions" + // in v7/v8 reference manuals (DDI0406, DDI0487 etc.) +#if defined(__aarch64__) || defined(HAVE_ARMV8) + // the actual clean/invalidate is broadcast to all cores, + // the manual only prescribes an isb + __asm__ volatile("isb"); +//#elif defined(_3DS) +// ctr_invalidate_icache(); +#else + // while on v6 this is always required, on v7 it depends on + // "Multiprocessing Extensions" being present, but that is difficult + // to detect so do it always for now + new_dyna_clear_cache(ndrc_g.thread.dirty_start, ndrc_g.thread.dirty_end); #endif -#ifdef DRC_DBG - memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); + ndrc_g.thread.dirty_start = ndrc_g.thread.dirty_end = 0; + } #endif - psxH_ptr = psxH; - zeromem_ptr = zero_mem; +} - return 0; +static void mixed_execute_block(struct psxRegisters *regs, enum blockExecCaller caller) +{ + psxInt.ExecuteBlock(regs, caller); } -static void ari64_reset() +static void mixed_clear(u32 addr, u32 size) { - printf("ari64_reset\n"); - new_dyna_pcsx_mem_reset(); - invalidate_all_pages(); - new_dyna_restore(); - pending_exception = 1; + ari64_clear(addr, size); + psxInt.Clear(addr, size); } -// execute until predefined leave points -// (HLE softcall exit and BIOS fastboot end) -static void ari64_execute_until() +static void mixed_notify(enum R3000Anote note, void *data) { - schedule_timeslice(); + ari64_notify(note, data); + psxInt.Notify(note, data); +} - evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); +static R3000Acpu psxMixedCpu = { + NULL /* Init */, NULL /* Reset */, NULL /* Execute */, + mixed_execute_block, + mixed_clear, + mixed_notify, + NULL /* ApplyConfig */, NULL /* Shutdown */ +}; + +static noinline void ari64_execute_threaded_slow(struct psxRegisters *regs, + enum blockExecCaller block_caller) +{ + if (ndrc_g.thread.busy_addr == ~0u) { + memcpy(ndrc_smrv_regs, regs->GPR.r, sizeof(ndrc_smrv_regs)); + slock_lock(ndrc_g.thread.lock); + ndrc_g.thread.busy_addr = regs->pc; + slock_unlock(ndrc_g.thread.lock); + scond_signal(ndrc_g.thread.cond); + } - new_dyna_start(); + //ari64_notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + psxInt.Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); + assert(psxCpu == &psxRec); + psxCpu = &psxMixedCpu; + for (;;) + { + mixed_execute_block(regs, block_caller); - evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); + if (ndrc_g.thread.busy_addr == ~0u) + break; + if (block_caller == EXEC_CALLER_HLE) { + if (!psxBiosSoftcallEnded()) + continue; + break; + } + else if (block_caller == EXEC_CALLER_BOOT) { + if (!psxExecuteBiosEnded()) + continue; + break; + } + if (regs->stop) + break; + } + psxCpu = &psxRec; + + psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); + ari64_on_ext_change(0, 1); } -static void ari64_execute() +static void ari64_execute_threaded_once(struct psxRegisters *regs, + enum blockExecCaller block_caller) { - while (!stop) { - ari64_execute_until(); - evprintf("drc left @%08x\n", psxRegs.pc); + void *drc_local = (char *)regs - LO_psxRegs; + struct ht_entry *hash_table = + *(void **)((char *)drc_local + LO_hash_table_ptr); + void *target; + + if (likely(ndrc_g.thread.busy_addr == ~0u)) { + target = ndrc_get_addr_ht_param(hash_table, regs->pc, + ndrc_cm_no_compile); + if (target) { + clear_local_cache(); + new_dyna_start_at(drc_local, target); + return; + } } + ari64_execute_threaded_slow(regs, block_caller); } -static void ari64_clear(u32 addr, u32 size) +static void ari64_execute_threaded(struct psxRegisters *regs) { - u32 start, end, main_ram; + schedule_timeslice(regs); + while (!regs->stop) + { + ari64_execute_threaded_once(regs, EXEC_CALLER_OTHER); - size *= 4; /* PCSX uses DMA units */ + if ((s32)(regs->cycle - regs->next_interupt) >= 0) + schedule_timeslice(regs); + } +} - evprintf("ari64_clear %08x %04x\n", addr, size); +static void ari64_execute_threaded_block(struct psxRegisters *regs, + enum blockExecCaller caller) +{ + if (caller == EXEC_CALLER_BOOT) + regs->stop++; - /* check for RAM mirrors */ - main_ram = (addr & 0xffe00000) == 0x80000000; + regs->next_interupt = regs->cycle + 1; - start = addr >> 12; - end = (addr + size) >> 12; + ari64_execute_threaded_once(regs, caller); + if (regs->cpuInRecursion) { + // must sync since we are returning to compiled code + ari64_thread_sync(); + } - for (; start <= end; start++) - if (!main_ram || !invalid_code[start]) - invalidate_block(start); + if (caller == EXEC_CALLER_BOOT) + regs->stop--; } -static void ari64_shutdown() +static void ari64_thread_sync(void) { - new_dynarec_cleanup(); + if (!ndrc_g.thread.lock || ndrc_g.thread.busy_addr == ~0u) + return; + for (;;) { + slock_lock(ndrc_g.thread.lock); + slock_unlock(ndrc_g.thread.lock); + if (ndrc_g.thread.busy_addr == ~0) + break; + retro_sleep(0); + } +} + +static int ari64_thread_check_range(unsigned int start, unsigned int end) +{ + u32 addr = ndrc_g.thread.busy_addr; + if (addr == ~0u) + return 0; + + addr &= 0x1fffffff; + start &= 0x1fffffff; + end &= 0x1fffffff; + if (addr >= end) + return 0; + if (addr + MAXBLOCK * 4 <= start) + return 0; + + //SysPrintf("%x hits %x-%x\n", addr, start, end); + return 1; +} + +static void ari64_compile_thread(void *unused) +{ + struct ht_entry *hash_table = + *(void **)((char *)dynarec_local + LO_hash_table_ptr); + void *target; + u32 addr; + + slock_lock(ndrc_g.thread.lock); + while (!ndrc_g.thread.exit) + { + addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr; + if (addr == ~0u) + scond_wait(ndrc_g.thread.cond, ndrc_g.thread.lock); + addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr; + if (addr == ~0u || ndrc_g.thread.exit) + continue; + + target = ndrc_get_addr_ht_param(hash_table, addr, + ndrc_cm_compile_in_thread); + //printf("c %08x -> %p\n", addr, target); + ndrc_g.thread.busy_addr = ~0u; + } + slock_unlock(ndrc_g.thread.lock); + (void)target; +} + +static void ari64_thread_shutdown(void) +{ + psxRec.Execute = ari64_execute; + psxRec.ExecuteBlock = ari64_execute_block; + + if (ndrc_g.thread.lock) + slock_lock(ndrc_g.thread.lock); + ndrc_g.thread.exit = 1; + if (ndrc_g.thread.lock) + slock_unlock(ndrc_g.thread.lock); + if (ndrc_g.thread.cond) + scond_signal(ndrc_g.thread.cond); + if (ndrc_g.thread.handle) { + sthread_join(ndrc_g.thread.handle); + ndrc_g.thread.handle = NULL; + } + if (ndrc_g.thread.cond) { + scond_free(ndrc_g.thread.cond); + ndrc_g.thread.cond = NULL; + } + if (ndrc_g.thread.lock) { + slock_free(ndrc_g.thread.lock); + ndrc_g.thread.lock = NULL; + } + ndrc_g.thread.busy_addr = ~0u; } -extern void intExecute(); -extern void intExecuteT(); -extern void intExecuteBlock(); -extern void intExecuteBlockT(); -#ifndef DRC_DBG -#define intExecuteT intExecute -#define intExecuteBlockT intExecuteBlock +static void ari64_thread_init(void) +{ + int enable; + + if (ndrc_g.hacks_pergame & NDHACK_THREAD_FORCE) + enable = 0; + else if (ndrc_g.hacks & NDHACK_THREAD_FORCE) + enable = ndrc_g.hacks & NDHACK_THREAD_FORCE_ON; + else { + u32 cpu_count = cpu_features_get_core_amount(); + enable = cpu_count > 1; +#ifdef _3DS + // bad for old3ds, reprotedly no improvement for new3ds + enable = 0; +#endif + } + + if (!ndrc_g.thread.handle == !enable) + return; + + ari64_thread_shutdown(); + ndrc_g.thread.exit = 0; + ndrc_g.thread.busy_addr = ~0u; + + if (enable) { + ndrc_g.thread.lock = slock_new(); + ndrc_g.thread.cond = scond_new(); + } + if (ndrc_g.thread.lock && ndrc_g.thread.cond) + ndrc_g.thread.handle = pcsxr_sthread_create(ari64_compile_thread, PCSXRT_DRC); + if (ndrc_g.thread.handle) { + psxRec.Execute = ari64_execute_threaded; + psxRec.ExecuteBlock = ari64_execute_threaded_block; + } + else { + // clean up potential incomplete init + ari64_thread_shutdown(); + } + SysPrintf("compiler thread %sabled\n", ndrc_g.thread.handle ? "en" : "dis"); +} +#else // if !NDRC_THREAD +static void ari64_thread_init(void) {} +static void ari64_thread_shutdown(void) {} +static int ari64_thread_check_range(unsigned int start, unsigned int end) { return 0; } #endif +static int ari64_init() +{ + static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); + size_t i; + + new_dynarec_init(); + new_dyna_pcsx_mem_init(); + + for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) + if (psxCP2[i] != gteNULL) + gte_handlers[i] = psxCP2[i]; + +#if defined(__arm__) && !defined(DRC_DBG) + gte_handlers[0x06] = gteNCLIP_arm; +#ifdef HAVE_ARMV5 + gte_handlers_nf[0x01] = gteRTPS_nf_arm; + gte_handlers_nf[0x30] = gteRTPT_nf_arm; +#endif +#ifdef __ARM_NEON__ + // compiler's _nf version is still a lot slower than neon + // _nf_arm RTPS is roughly the same, RTPT slower + gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; + gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; +#endif +#endif +#ifdef DRC_DBG + memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); +#endif + psxH_ptr = psxH; + zeromem_ptr = zero_mem; + scratch_buf_ptr = scratch_buf; // for gte_neon.S + + ndrc_g.cycle_multiplier_old = Config.cycle_multiplier; + ndrc_g.hacks_old = ndrc_g.hacks | ndrc_g.hacks_pergame; + ari64_apply_config(); + ari64_thread_init(); + + return 0; +} + +static void ari64_shutdown() +{ + ari64_thread_shutdown(); + new_dynarec_cleanup(); + new_dyna_pcsx_mem_shutdown(); +} + R3000Acpu psxRec = { ari64_init, ari64_reset, -#if defined(__arm__) ari64_execute, - ari64_execute_until, -#else - intExecuteT, - intExecuteBlockT, -#endif + ari64_execute_block, ari64_clear, + ari64_notify, + ari64_apply_config, ari64_shutdown }; -// TODO: rm -#ifndef DRC_DBG -void do_insn_trace() {} -void do_insn_cmp() {} -#endif +#else // if DRC_DISABLE -#if defined(__x86_64__) || defined(__i386__) -unsigned int address; -int pending_exception, stop; -unsigned int next_interupt; -int new_dynarec_did_compile; -int cycle_multiplier; -int new_dynarec_hacks; -void *psxH_ptr; -void *zeromem_ptr; -u8 zero_mem[0x1000]; +struct ndrc_globals ndrc_g; // dummy void new_dynarec_init() {} -void new_dyna_start() {} +void new_dyna_start(void *context) {} void new_dynarec_cleanup() {} void new_dynarec_clear_full() {} -void invalidate_all_pages() {} -void invalidate_block(unsigned int block) {} +void new_dynarec_invalidate_all_pages() {} +void new_dynarec_invalidate_range(unsigned int start, unsigned int end) {} void new_dyna_pcsx_mem_init(void) {} void new_dyna_pcsx_mem_reset(void) {} void new_dyna_pcsx_mem_load_state(void) {} +void new_dyna_pcsx_mem_isolate(int enable) {} +void new_dyna_pcsx_mem_shutdown(void) {} +int new_dynarec_save_blocks(void *save, int size) { return 0; } +void new_dynarec_load_blocks(const void *save, int size) {} + +#endif // DRC_DISABLE + +#ifndef NDRC_THREAD +static void ari64_thread_sync(void) {} #endif #ifdef DRC_DBG #include static FILE *f; -extern u32 last_io_addr; +u32 irq_test_cycle; +u32 handler_cycle; +u32 last_io_addr; -static void dump_mem(const char *fname, void *mem, size_t size) +void dump_mem(const char *fname, void *mem, size_t size) { FILE *f1 = fopen(fname, "wb"); if (f1 == NULL) @@ -424,21 +699,22 @@ static u32 memcheck_read(u32 a) return *(u32 *)(psxM + (a & 0x1ffffc)); } +#if 0 void do_insn_trace(void) { static psxRegisters oldregs; - static u32 old_io_addr = (u32)-1; - static u32 old_io_data = 0xbad0c0de; + static u32 event_cycles_o[PSXINT_COUNT]; u32 *allregs_p = (void *)&psxRegs; u32 *allregs_o = (void *)&oldregs; u32 io_data; int i; u8 byte; -//last_io_addr = 0x5e2c8; + //last_io_addr = 0x5e2c8; if (f == NULL) f = fopen("tracelog", "wb"); + // log reg changes oldregs.code = psxRegs.code; // don't care for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { if (allregs_p[i] != allregs_o[i]) { @@ -447,19 +723,30 @@ void do_insn_trace(void) allregs_o[i] = allregs_p[i]; } } - if (old_io_addr != last_io_addr) { - byte = 0xfd; - fwrite(&byte, 1, 1, f); - fwrite(&last_io_addr, 1, 4, f); - old_io_addr = last_io_addr; + // log event changes + for (i = 0; i < PSXINT_COUNT; i++) { + if (psxRegs.event_cycles[i] != event_cycles_o[i]) { + byte = 0xf8; + fwrite(&byte, 1, 1, f); + fwrite(&i, 1, 1, f); + fwrite(&psxRegs.event_cycles[i], 1, 4, f); + event_cycles_o[i] = psxRegs.event_cycles[i]; + } } - io_data = memcheck_read(last_io_addr); - if (old_io_data != io_data) { - byte = 0xfe; - fwrite(&byte, 1, 1, f); - fwrite(&io_data, 1, 4, f); - old_io_data = io_data; + #define SAVE_IF_CHANGED(code_, name_) { \ + static u32 old_##name_ = 0xbad0c0de; \ + if (old_##name_ != name_) { \ + byte = code_; \ + fwrite(&byte, 1, 1, f); \ + fwrite(&name_, 1, 4, f); \ + old_##name_ = name_; \ + } \ } + SAVE_IF_CHANGED(0xfb, irq_test_cycle); + SAVE_IF_CHANGED(0xfc, handler_cycle); + SAVE_IF_CHANGED(0xfd, last_io_addr); + io_data = memcheck_read(last_io_addr); + SAVE_IF_CHANGED(0xfe, io_data); byte = 0xff; fwrite(&byte, 1, 1, f); @@ -472,6 +759,7 @@ void do_insn_trace(void) } #endif } +#endif static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", @@ -520,12 +808,18 @@ void breakme() {} void do_insn_cmp(void) { + extern int last_count; static psxRegisters rregs; static u32 mem_addr, mem_val; + static u32 irq_test_cycle_intr; + static u32 handler_cycle_intr; u32 *allregs_p = (void *)&psxRegs; u32 *allregs_e = (void *)&rregs; + u32 badregs_mask = 0; static u32 ppc, failcount; - int i, ret, bad = 0; + static u32 badregs_mask_prev; + int i, ret, bad = 0, fatal = 0, which_event = -1; + u32 ev_cycles = 0; u8 code; if (f == NULL) @@ -538,34 +832,63 @@ void do_insn_cmp(void) break; if (code == 0xff) break; - if (code == 0xfd) { - if ((ret = fread(&mem_addr, 1, 4, f)) <= 0) - break; + switch (code) { + case 0xf8: + which_event = 0; + fread(&which_event, 1, 1, f); + fread(&ev_cycles, 1, 4, f); continue; - } - if (code == 0xfe) { - if ((ret = fread(&mem_val, 1, 4, f)) <= 0) - break; + case 0xfb: + fread(&irq_test_cycle_intr, 1, 4, f); + continue; + case 0xfc: + fread(&handler_cycle_intr, 1, 4, f); + continue; + case 0xfd: + fread(&mem_addr, 1, 4, f); + continue; + case 0xfe: + fread(&mem_val, 1, 4, f); continue; } - if ((ret = fread(&allregs_e[code], 1, 4, f)) <= 0) - break; + assert(code < offsetof(psxRegisters, intCycle) / 4); + fread(&allregs_e[code], 1, 4, f); } if (ret <= 0) { printf("EOF?\n"); - goto end; + exit(1); } psxRegs.code = rregs.code; // don't care - psxRegs.cycle = rregs.cycle; + psxRegs.cycle += last_count; + //psxRegs.cycle = rregs.cycle; // needs reload in _cmp psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count -//if (psxRegs.cycle == 166172) breakme(); + //if (psxRegs.cycle == 166172) breakme(); + + if (which_event >= 0 && psxRegs.event_cycles[which_event] != ev_cycles) { + printf("bad ev_cycles #%d: %u %u / %u\n", which_event, + psxRegs.event_cycles[which_event], ev_cycles, psxRegs.cycle); + fatal = 1; + } + + if (irq_test_cycle > irq_test_cycle_intr) { + printf("bad irq_test_cycle: %u %u\n", irq_test_cycle, irq_test_cycle_intr); + fatal = 1; + } + + if (handler_cycle != handler_cycle_intr) { + printf("bad handler_cycle: %u %u\n", handler_cycle, handler_cycle_intr); + fatal = 1; + } - if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && - mem_val == memcheck_read(mem_addr) - ) { + if (mem_val != memcheck_read(mem_addr)) { + printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); + fatal = 1; + } + + if (!fatal && !memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle))) { failcount = 0; goto ok; } @@ -574,25 +897,27 @@ void do_insn_cmp(void) if (allregs_p[i] != allregs_e[i]) { miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); bad++; + if (i >= 32) + fatal = 1; + else + badregs_mask |= 1u << i; } } - if (mem_val != memcheck_read(mem_addr)) { - printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); - goto end; - } + if (badregs_mask_prev & badregs_mask) + failcount++; + else + failcount = 0; - if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { + if (!fatal && psxRegs.pc == rregs.pc && bad < 6 && failcount < 24) { static int last_mcycle; if (last_mcycle != psxRegs.cycle >> 20) { printf("%u\n", psxRegs.cycle); last_mcycle = psxRegs.cycle >> 20; } - failcount++; goto ok; } -end: for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask) printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n", regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val, @@ -601,13 +926,15 @@ end: for (i = 0; i < 8; i++) printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i], i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]); - printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); + printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, + psxRegs.cycle, psxRegs.next_interupt); + //dump_mem("/tmp/psxram.dump", psxM, 0x200000); + //dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); exit(1); ok: - psxRegs.cycle = rregs.cycle + 2; // sync timing + //psxRegs.cycle = rregs.cycle + 2; // sync timing ppc = psxRegs.pc; + badregs_mask_prev = badregs_mask; } -#endif +#endif // DRC_DBG