drc: add some hack options
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / emu_if.c
index 4f0ca75..3cd4f8e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * (C) Gražvydas "notaz" Ignotas, 2010
+ * (C) Gražvydas "notaz" Ignotas, 2010-2011
  *
  * This work is licensed under the terms of GNU GPL version 2 or later.
  * See the COPYING file in the top-level directory.
 #include "../cdrom.h"
 #include "../psxdma.h"
 #include "../mdec.h"
+#include "../gte_arm.h"
+#include "../gte_neon.h"
+#define FLAGLESS
+#include "../gte.h"
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
 
@@ -60,6 +64,10 @@ static irq_func * const irq_funcs[] = {
        [PSXINT_MDECOUTDMA] = mdec1Interrupt,
        [PSXINT_SPUDMA] = spuInterrupt,
        [PSXINT_MDECINDMA] = mdec0Interrupt,
+       [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
+       [PSXINT_CDRDMA] = cdrDmaInterrupt,
+       [PSXINT_CDRLID] = cdrLidSeekInterrupt,
+       [PSXINT_CDRPLAY] = cdrPlayInterrupt,
 };
 
 /* local dupe of psxBranchTest, using event_cycles */
@@ -94,9 +102,6 @@ static void irq_test(void)
 void gen_interupt()
 {
        evprintf("  +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
-#ifdef DRC_DBG
-       psxRegs.cycle += 2;
-#endif
 
        irq_test();
        //psxBranchTest();
@@ -108,19 +113,20 @@ void gen_interupt()
                next_interupt, next_interupt - psxRegs.cycle);
 }
 
-void MTC0_()
-{
-       extern void psxMTC0();
+// from interpreter
+extern void MTC0(int reg, u32 val);
 
-       evprintf("ari64 MTC0 %08x %08x %u\n", psxRegs.code, psxRegs.pc, psxRegs.cycle);
-       psxMTC0();
-       gen_interupt(); /* FIXME: checking pending irqs should be enough */
+void pcsx_mtc0(u32 reg, u32 val)
+{
+       evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
+       MTC0(reg, val);
+       gen_interupt();
 }
 
-void check_interupt()
+void pcsx_mtc0_ds(u32 reg, u32 val)
 {
-       /* FIXME (also asm) */
-       printf("ari64_check_interupt\n");
+       evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
+       MTC0(reg, val);
 }
 
 void new_dyna_save(void)
@@ -131,12 +137,37 @@ void new_dyna_save(void)
 void new_dyna_restore(void)
 {
        int i;
-       for (i = 0; i < PSXINT_NEWDRC_CHECK; i++)
+       for (i = 0; i < PSXINT_COUNT; i++)
                event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
+
+       new_dyna_pcsx_mem_load_state();
 }
 
+/* GTE stuff */
 void *gte_handlers[64];
 
+void *gte_handlers_nf[64] = {
+       NULL      , gteRTPS_nf , NULL       , NULL      , NULL     , NULL       , gteNCLIP_nf, NULL      , // 00
+       NULL      , NULL       , NULL       , NULL      , gteOP_nf , NULL       , NULL       , NULL      , // 08
+       gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL       , gteNCDT_nf , NULL      , // 10
+       NULL      , NULL       , NULL       , gteNCCS_nf, gteCC_nf , NULL       , gteNCS_nf  , NULL      , // 18
+       gteNCT_nf , NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 20
+       gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL      , NULL     , gteAVSZ3_nf, gteAVSZ4_nf, NULL      , // 28 
+       gteRTPT_nf, NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 30
+       NULL      , NULL       , NULL       , NULL      , NULL     , gteGPF_nf  , gteGPL_nf  , gteNCCT_nf, // 38
+};
+
+const char *gte_regnames[64] = {
+       NULL  , "RTPS" , NULL   , NULL  , NULL , NULL   , "NCLIP", NULL  , // 00
+       NULL  , NULL   , NULL   , NULL  , "OP" , NULL   , NULL   , NULL  , // 08
+       "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL   , "NCDT" , NULL  , // 10
+       NULL  , NULL   , NULL   , "NCCS", "CC" , NULL   , "NCS"  , NULL  , // 18
+       "NCT" , NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 20
+       "SQR" , "DCPL" , "DPCT" , NULL  , NULL , "AVSZ3", "AVSZ4", NULL  , // 28 
+       "RTPT", NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 30
+       NULL  , NULL   , NULL   , NULL  , NULL , "GPF"  , "GPL"  , "NCCT", // 38
+};
+
 /* from gte.txt.. not sure if this is any good. */
 const char gte_cycletab[64] = {
        /*   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f */
@@ -146,11 +177,86 @@ const char gte_cycletab[64] = {
        23,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5, 39,
 };
 
+#define GCBIT(x) \
+       (1ll << (32+x))
+#define GDBIT(x) \
+       (1ll << (x))
+#define GCBITS3(b0,b1,b2) \
+       (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
+#define GDBITS2(b0,b1) \
+       (GDBIT(b0) | GDBIT(b1))
+#define GDBITS3(b0,b1,b2) \
+       (GDBITS2(b0,b1) | GDBIT(b2))
+#define GDBITS4(b0,b1,b2,b3) \
+       (GDBITS3(b0,b1,b2) | GDBIT(b3))
+#define GDBITS5(b0,b1,b2,b3,b4) \
+       (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
+#define GDBITS6(b0,b1,b2,b3,b4,b5) \
+       (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
+#define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
+       (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
+#define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
+       (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
+#define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
+       (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
+#define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
+       (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
+
+const uint64_t gte_reg_reads[64] = {
+       [GTE_RTPS]  = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
+       [GTE_NCLIP] =                        GDBITS3(12,13,14),
+       [GTE_OP]    = GCBITS3(0,2,4)       | GDBITS3(9,10,11),
+       [GTE_DPCS]  = GCBITS3(21,22,23)    | GDBITS4(6,8,21,22),
+       [GTE_INTPL] = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
+       [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS6(0,1,2,3,4,5), // XXX: maybe decode further?
+       [GTE_NCDS]  = 0x00ffff0000000000ll | GDBITS5(0,1,6,21,22),
+       [GTE_CDP]   = 0x00fff00000000000ll | GDBITS7(6,8,9,10,11,21,22),
+       [GTE_NCDT]  = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
+       [GTE_NCCS]  = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
+       [GTE_CC]    = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
+       [GTE_NCS]   = 0x001fff0000000000ll | GDBITS4(0,1,21,22),
+       [GTE_NCT]   = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
+       [GTE_SQR]   =                        GDBITS3(9,10,11),
+       [GTE_DCPL]  = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
+       [GTE_DPCT]  = GCBITS3(21,22,23)    | GDBITS4(8,20,21,22),
+       [GTE_AVSZ3] = GCBIT(29)            | GDBITS3(17,18,19),
+       [GTE_AVSZ4] = GCBIT(30)            | GDBITS4(16,17,18,19),
+       [GTE_RTPT]  = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
+       [GTE_GPF]   =                        GDBITS7(6,8,9,10,11,21,22),
+       [GTE_GPL]   =                        GDBITS10(6,8,9,10,11,21,22,25,26,27),
+       [GTE_NCCT]  = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
+};
+
+// note: this excludes gteFLAG that is always written to
+const uint64_t gte_reg_writes[64] = {
+       [GTE_RTPS]  = 0x0f0f7f00ll,
+       [GTE_NCLIP] = GDBIT(24),
+       [GTE_OP]    = GDBITS6(9,10,11,25,26,27),
+       [GTE_DPCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
+       [GTE_NCDS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_CDP]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCDT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_CC]    = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCS]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCT]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_SQR]   = GDBITS6(9,10,11,25,26,27),
+       [GTE_DCPL]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_DPCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_AVSZ3] = GDBITS2(7,24),
+       [GTE_AVSZ4] = GDBITS2(7,24),
+       [GTE_RTPT]  = 0x0f0f7f00ll,
+       [GTE_GPF]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_GPL]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+};
+
 static int ari64_init()
 {
        extern void (*psxCP2[64])();
        extern void psxNULL();
-       extern void *psxH_ptr;
        size_t i;
 
        new_dynarec_init();
@@ -160,7 +266,24 @@ static int ari64_init()
                if (psxCP2[i] != psxNULL)
                        gte_handlers[i] = psxCP2[i];
 
+#if !defined(DRC_DBG)
+#ifdef __arm__
+       gte_handlers[0x06] = gteNCLIP_arm;
+       gte_handlers_nf[0x01] = gteRTPS_nf_arm;
+       gte_handlers_nf[0x30] = gteRTPT_nf_arm;
+#endif
+#ifdef __ARM_NEON__
+       // compiler's _nf version is still a lot slower than neon
+       // _nf_arm RTPS is roughly the same, RTPT slower
+       gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
+       gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
+#endif
+#endif
+#ifdef DRC_DBG
+       memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
+#endif
        psxH_ptr = psxH;
+       zeromem_ptr = zero_mem;
 
        return 0;
 }
@@ -170,10 +293,13 @@ static void ari64_reset()
        printf("ari64_reset\n");
        new_dyna_pcsx_mem_reset();
        invalidate_all_pages();
+       new_dyna_restore();
        pending_exception = 1;
 }
 
-static void ari64_execute()
+// execute until predefined leave points
+// (HLE softcall exit and BIOS fastboot end)
+static void ari64_execute_until()
 {
        schedule_timeslice();
 
@@ -186,23 +312,30 @@ static void ari64_execute()
                psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
 }
 
+static void ari64_execute()
+{
+       while (!stop) {
+               ari64_execute_until();
+               evprintf("drc left @%08x\n", psxRegs.pc);
+       }
+}
+
 static void ari64_clear(u32 addr, u32 size)
 {
-       u32 start, end;
+       u32 start, end, main_ram;
+
+       size *= 4; /* PCSX uses DMA units */
 
        evprintf("ari64_clear %08x %04x\n", addr, size);
 
        /* check for RAM mirrors */
-       if ((addr & ~0xe0600000) < 0x200000) {
-               addr &= ~0xe0600000;
-               addr |=  0x80000000;
-       }
+       main_ram = (addr & 0xffe00000) == 0x80000000;
 
        start = addr >> 12;
        end = (addr + size) >> 12;
 
        for (; start <= end; start++)
-               if (!invalid_code[start])
+               if (!main_ram || !invalid_code[start])
                        invalidate_block(start);
 }
 
@@ -223,9 +356,9 @@ extern void intExecuteBlockT();
 R3000Acpu psxRec = {
        ari64_init,
        ari64_reset,
-#if 1
-       ari64_execute,
+#if defined(__arm__)
        ari64_execute,
+       ari64_execute_until,
 #else
        intExecuteT,
        intExecuteBlockT,
@@ -241,19 +374,24 @@ void do_insn_cmp() {}
 #endif
 
 #if defined(__x86_64__) || defined(__i386__)
-unsigned int address, readmem_word, word;
-unsigned short hword;
-unsigned char byte;
+unsigned int address;
 int pending_exception, stop;
 unsigned int next_interupt;
+int new_dynarec_did_compile;
+int cycle_multiplier;
+int new_dynarec_hacks;
 void *psxH_ptr;
+void *zeromem_ptr;
+u8 zero_mem[0x1000];
 void new_dynarec_init() {}
 void new_dyna_start() {}
 void new_dynarec_cleanup() {}
+void new_dynarec_clear_full() {}
 void invalidate_all_pages() {}
 void invalidate_block(unsigned int block) {}
 void new_dyna_pcsx_mem_init(void) {}
 void new_dyna_pcsx_mem_reset(void) {}
+void new_dyna_pcsx_mem_load_state(void) {}
 #endif
 
 #ifdef DRC_DBG
@@ -271,6 +409,21 @@ static void dump_mem(const char *fname, void *mem, size_t size)
        fclose(f1);
 }
 
+static u32 memcheck_read(u32 a)
+{
+       if ((a >> 16) == 0x1f80)
+               // scratchpad/IO
+               return *(u32 *)(psxH + (a & 0xfffc));
+
+       if ((a >> 16) == 0x1f00)
+               // parallel
+               return *(u32 *)(psxP + (a & 0xfffc));
+
+//     if ((a & ~0xe0600000) < 0x200000)
+       // RAM
+       return *(u32 *)(psxM + (a & 0x1ffffc));
+}
+
 void do_insn_trace(void)
 {
        static psxRegisters oldregs;
@@ -278,7 +431,7 @@ void do_insn_trace(void)
        static u32 old_io_data = 0xbad0c0de;
        u32 *allregs_p = (void *)&psxRegs;
        u32 *allregs_o = (void *)&oldregs;
-       u32 *io_data;
+       u32 io_data;
        int i;
        u8 byte;
 
@@ -300,12 +453,12 @@ void do_insn_trace(void)
                fwrite(&last_io_addr, 1, 4, f);
                old_io_addr = last_io_addr;
        }
-       io_data = (void *)(psxM + (last_io_addr&0x1ffffc));
-       if (old_io_data != *io_data) {
+       io_data = memcheck_read(last_io_addr);
+       if (old_io_data != io_data) {
                byte = 0xfe;
                fwrite(&byte, 1, 1, f);
-               fwrite(io_data, 1, 4, f);
-               old_io_data = *io_data;
+               fwrite(&io_data, 1, 4, f);
+               old_io_data = io_data;
        }
        byte = 0xff;
        fwrite(&byte, 1, 1, f);
@@ -405,16 +558,13 @@ void do_insn_cmp(void)
        }
 
        psxRegs.code = rregs.code; // don't care
-psxRegs.cycle = rregs.cycle;
-psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
+       psxRegs.cycle = rregs.cycle;
+       psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
 
 //if (psxRegs.cycle == 166172) breakme();
-//if (psxRegs.cycle > 11296376) printf("pc=%08x %u  %08x\n", psxRegs.pc, psxRegs.cycle, psxRegs.interrupt);
-
-       mem_addr &= 0x1ffffc;
 
        if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
-                       mem_val == *(u32 *)(psxM + mem_addr)
+                       mem_val == memcheck_read(mem_addr)
           ) {
                failcount = 0;
                goto ok;
@@ -427,8 +577,8 @@ psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
                }
        }
 
-       if (mem_val != *(u32 *)(psxM + mem_addr)) {
-               printf("bad mem @%08x: %08x %08x\n", mem_addr, *(u32 *)(psxM + mem_addr), mem_val);
+       if (mem_val != memcheck_read(mem_addr)) {
+               printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
                goto end;
        }