X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Femu_if.c;h=852d881e572296d9c5c6edfd3674da617e2ecd57;hp=4f0ca758cedbd793b39b6ad5a98be210fa69aad2;hb=5b8c000f969c365d48418781d8f88f9c58d65611;hpb=528ad661dd07067cbcd6c1beebad63fc36795759 diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 4f0ca758..852d881e 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -1,5 +1,5 @@ /* - * (C) Gražvydas "notaz" Ignotas, 2010 + * (C) Gražvydas "notaz" Ignotas, 2010-2011 * * This work is licensed under the terms of GNU GPL version 2 or later. * See the COPYING file in the top-level directory. @@ -14,6 +14,10 @@ #include "../cdrom.h" #include "../psxdma.h" #include "../mdec.h" +#include "../gte_arm.h" +#include "../gte_neon.h" +#define FLAGLESS +#include "../gte.h" #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) @@ -26,28 +30,19 @@ u32 event_cycles[PSXINT_COUNT]; static void schedule_timeslice(void) { u32 i, c = psxRegs.cycle; + u32 irqs = psxRegs.interrupt; s32 min, dif; - min = psxNextsCounter + psxNextCounter - c; - for (i = 0; i < ARRAY_SIZE(event_cycles); i++) { + min = PSXCLK; + for (i = 0; irqs != 0; i++, irqs >>= 1) { + if (!(irqs & 1)) + continue; dif = event_cycles[i] - c; //evprintf(" ev %d\n", dif); if (0 < dif && dif < min) min = dif; } next_interupt = c + min; - -#if 0 - static u32 cnt, last_cycle; - static u64 sum; - if (last_cycle) { - cnt++; - sum += psxRegs.cycle - last_cycle; - if ((cnt & 0xff) == 0) - printf("%u\n", (u32)(sum / cnt)); - } - last_cycle = psxRegs.cycle; -#endif } typedef void (irq_func)(); @@ -60,6 +55,11 @@ static irq_func * const irq_funcs[] = { [PSXINT_MDECOUTDMA] = mdec1Interrupt, [PSXINT_SPUDMA] = spuInterrupt, [PSXINT_MDECINDMA] = mdec0Interrupt, + [PSXINT_GPUOTCDMA] = gpuotcInterrupt, + [PSXINT_CDRDMA] = cdrDmaInterrupt, + [PSXINT_CDRLID] = cdrLidSeekInterrupt, + [PSXINT_CDRPLAY] = cdrPlayInterrupt, + [PSXINT_RCNT] = psxRcntUpdate, }; /* local dupe of psxBranchTest, using event_cycles */ @@ -69,9 +69,6 @@ static void irq_test(void) u32 cycle = psxRegs.cycle; u32 irq, irq_bits; - if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter) - psxRcntUpdate(); - // irq_funcs() may queue more irqs psxRegs.interrupt = 0; @@ -94,9 +91,6 @@ static void irq_test(void) void gen_interupt() { evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt); -#ifdef DRC_DBG - psxRegs.cycle += 2; -#endif irq_test(); //psxBranchTest(); @@ -108,35 +102,72 @@ void gen_interupt() next_interupt, next_interupt - psxRegs.cycle); } -void MTC0_() -{ - extern void psxMTC0(); +// from interpreter +extern void MTC0(int reg, u32 val); - evprintf("ari64 MTC0 %08x %08x %u\n", psxRegs.code, psxRegs.pc, psxRegs.cycle); - psxMTC0(); - gen_interupt(); /* FIXME: checking pending irqs should be enough */ +void pcsx_mtc0(u32 reg, u32 val) +{ + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); + MTC0(reg, val); + gen_interupt(); } -void check_interupt() +void pcsx_mtc0_ds(u32 reg, u32 val) { - /* FIXME (also asm) */ - printf("ari64_check_interupt\n"); + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); + MTC0(reg, val); } void new_dyna_save(void) { + psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat + // psxRegs.intCycle is always maintained, no need to convert } +void new_dyna_after_save(void) +{ + psxRegs.interrupt |= 1 << PSXINT_RCNT; +} + void new_dyna_restore(void) { int i; - for (i = 0; i < PSXINT_NEWDRC_CHECK; i++) + for (i = 0; i < PSXINT_COUNT; i++) event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; + + event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; + psxRegs.interrupt |= 1 << PSXINT_RCNT; + psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; + + new_dyna_pcsx_mem_load_state(); } +/* GTE stuff */ void *gte_handlers[64]; +void *gte_handlers_nf[64] = { + NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00 + NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08 + gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10 + NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18 + gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28 + gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38 +}; + +const char *gte_regnames[64] = { + NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00 + NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08 + "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10 + NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18 + "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28 + "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 +}; + /* from gte.txt.. not sure if this is any good. */ const char gte_cycletab[64] = { /* 1 2 3 4 5 6 7 8 9 a b c d e f */ @@ -146,11 +177,86 @@ const char gte_cycletab[64] = { 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, }; +#define GCBIT(x) \ + (1ll << (32+x)) +#define GDBIT(x) \ + (1ll << (x)) +#define GCBITS3(b0,b1,b2) \ + (GCBIT(b0) | GCBIT(b1) | GCBIT(b2)) +#define GDBITS2(b0,b1) \ + (GDBIT(b0) | GDBIT(b1)) +#define GDBITS3(b0,b1,b2) \ + (GDBITS2(b0,b1) | GDBIT(b2)) +#define GDBITS4(b0,b1,b2,b3) \ + (GDBITS3(b0,b1,b2) | GDBIT(b3)) +#define GDBITS5(b0,b1,b2,b3,b4) \ + (GDBITS4(b0,b1,b2,b3) | GDBIT(b4)) +#define GDBITS6(b0,b1,b2,b3,b4,b5) \ + (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5)) +#define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \ + (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6)) +#define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \ + (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7)) +#define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \ + (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8)) +#define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \ + (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9)) + +const uint64_t gte_reg_reads[64] = { + [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19), + [GTE_NCLIP] = GDBITS3(12,13,14), + [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11), + [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22), + [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), + [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further? + [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22), + [GTE_CDP] = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22), + [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8), + [GTE_NCCS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22), + [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22), + [GTE_NCS] = 0x001fff0000000000ll | GDBITS4(0,1,21,22), + [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), + [GTE_SQR] = GDBITS3(9,10,11), + [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), + [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22), + [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19), + [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19), + [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19), + [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22), + [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27), + [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), +}; + +// note: this excludes gteFLAG that is always written to +const uint64_t gte_reg_writes[64] = { + [GTE_RTPS] = 0x0f0f7f00ll, + [GTE_NCLIP] = GDBIT(24), + [GTE_OP] = GDBITS6(9,10,11,25,26,27), + [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27), + [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_SQR] = GDBITS6(9,10,11,25,26,27), + [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_AVSZ3] = GDBITS2(7,24), + [GTE_AVSZ4] = GDBITS2(7,24), + [GTE_RTPT] = 0x0f0f7f00ll, + [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), +}; + static int ari64_init() { extern void (*psxCP2[64])(); extern void psxNULL(); - extern void *psxH_ptr; size_t i; new_dynarec_init(); @@ -160,7 +266,24 @@ static int ari64_init() if (psxCP2[i] != psxNULL) gte_handlers[i] = psxCP2[i]; +#if !defined(DRC_DBG) +#ifdef __arm__ + gte_handlers[0x06] = gteNCLIP_arm; + gte_handlers_nf[0x01] = gteRTPS_nf_arm; + gte_handlers_nf[0x30] = gteRTPT_nf_arm; +#endif +#ifdef __ARM_NEON__ + // compiler's _nf version is still a lot slower than neon + // _nf_arm RTPS is roughly the same, RTPT slower + gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; + gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; +#endif +#endif +#ifdef DRC_DBG + memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); +#endif psxH_ptr = psxH; + zeromem_ptr = zero_mem; return 0; } @@ -170,10 +293,13 @@ static void ari64_reset() printf("ari64_reset\n"); new_dyna_pcsx_mem_reset(); invalidate_all_pages(); + new_dyna_restore(); pending_exception = 1; } -static void ari64_execute() +// execute until predefined leave points +// (HLE softcall exit and BIOS fastboot end) +static void ari64_execute_until() { schedule_timeslice(); @@ -186,23 +312,30 @@ static void ari64_execute() psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); } +static void ari64_execute() +{ + while (!stop) { + ari64_execute_until(); + evprintf("drc left @%08x\n", psxRegs.pc); + } +} + static void ari64_clear(u32 addr, u32 size) { - u32 start, end; + u32 start, end, main_ram; + + size *= 4; /* PCSX uses DMA units */ evprintf("ari64_clear %08x %04x\n", addr, size); /* check for RAM mirrors */ - if ((addr & ~0xe0600000) < 0x200000) { - addr &= ~0xe0600000; - addr |= 0x80000000; - } + main_ram = (addr & 0xffe00000) == 0x80000000; start = addr >> 12; end = (addr + size) >> 12; for (; start <= end; start++) - if (!invalid_code[start]) + if (!main_ram || !invalid_code[start]) invalidate_block(start); } @@ -223,9 +356,9 @@ extern void intExecuteBlockT(); R3000Acpu psxRec = { ari64_init, ari64_reset, -#if 1 - ari64_execute, +#if defined(__arm__) ari64_execute, + ari64_execute_until, #else intExecuteT, intExecuteBlockT, @@ -241,19 +374,24 @@ void do_insn_cmp() {} #endif #if defined(__x86_64__) || defined(__i386__) -unsigned int address, readmem_word, word; -unsigned short hword; -unsigned char byte; +unsigned int address; int pending_exception, stop; unsigned int next_interupt; +int new_dynarec_did_compile; +int cycle_multiplier; +int new_dynarec_hacks; void *psxH_ptr; +void *zeromem_ptr; +u8 zero_mem[0x1000]; void new_dynarec_init() {} void new_dyna_start() {} void new_dynarec_cleanup() {} +void new_dynarec_clear_full() {} void invalidate_all_pages() {} void invalidate_block(unsigned int block) {} void new_dyna_pcsx_mem_init(void) {} void new_dyna_pcsx_mem_reset(void) {} +void new_dyna_pcsx_mem_load_state(void) {} #endif #ifdef DRC_DBG @@ -271,6 +409,21 @@ static void dump_mem(const char *fname, void *mem, size_t size) fclose(f1); } +static u32 memcheck_read(u32 a) +{ + if ((a >> 16) == 0x1f80) + // scratchpad/IO + return *(u32 *)(psxH + (a & 0xfffc)); + + if ((a >> 16) == 0x1f00) + // parallel + return *(u32 *)(psxP + (a & 0xfffc)); + +// if ((a & ~0xe0600000) < 0x200000) + // RAM + return *(u32 *)(psxM + (a & 0x1ffffc)); +} + void do_insn_trace(void) { static psxRegisters oldregs; @@ -278,7 +431,7 @@ void do_insn_trace(void) static u32 old_io_data = 0xbad0c0de; u32 *allregs_p = (void *)&psxRegs; u32 *allregs_o = (void *)&oldregs; - u32 *io_data; + u32 io_data; int i; u8 byte; @@ -300,12 +453,12 @@ void do_insn_trace(void) fwrite(&last_io_addr, 1, 4, f); old_io_addr = last_io_addr; } - io_data = (void *)(psxM + (last_io_addr&0x1ffffc)); - if (old_io_data != *io_data) { + io_data = memcheck_read(last_io_addr); + if (old_io_data != io_data) { byte = 0xfe; fwrite(&byte, 1, 1, f); - fwrite(io_data, 1, 4, f); - old_io_data = *io_data; + fwrite(&io_data, 1, 4, f); + old_io_data = io_data; } byte = 0xff; fwrite(&byte, 1, 1, f); @@ -405,16 +558,13 @@ void do_insn_cmp(void) } psxRegs.code = rregs.code; // don't care -psxRegs.cycle = rregs.cycle; -psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count + psxRegs.cycle = rregs.cycle; + psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count //if (psxRegs.cycle == 166172) breakme(); -//if (psxRegs.cycle > 11296376) printf("pc=%08x %u %08x\n", psxRegs.pc, psxRegs.cycle, psxRegs.interrupt); - - mem_addr &= 0x1ffffc; if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && - mem_val == *(u32 *)(psxM + mem_addr) + mem_val == memcheck_read(mem_addr) ) { failcount = 0; goto ok; @@ -427,8 +577,8 @@ psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count } } - if (mem_val != *(u32 *)(psxM + mem_addr)) { - printf("bad mem @%08x: %08x %08x\n", mem_addr, *(u32 *)(psxM + mem_addr), mem_val); + if (mem_val != memcheck_read(mem_addr)) { + printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); goto end; }