spu: add a schedule callback
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / emu_if.c
index fbd4f96..89e2bd6 100644 (file)
 #define evprintf(...)
 
 char invalid_code[0x100000];
+static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
 u32 event_cycles[PSXINT_COUNT];
 
 static void schedule_timeslice(void)
 {
        u32 i, c = psxRegs.cycle;
+       u32 irqs = psxRegs.interrupt;
        s32 min, dif;
 
-       min = psxNextsCounter + psxNextCounter - c;
-       for (i = 0; i < ARRAY_SIZE(event_cycles); i++) {
+       min = PSXCLK;
+       for (i = 0; irqs != 0; i++, irqs >>= 1) {
+               if (!(irqs & 1))
+                       continue;
                dif = event_cycles[i] - c;
                //evprintf("  ev %d\n", dif);
                if (0 < dif && dif < min)
                        min = dif;
        }
        next_interupt = c + min;
-
-#if 0
-       static u32 cnt, last_cycle;
-       static u64 sum;
-       if (last_cycle) {
-               cnt++;
-               sum += psxRegs.cycle - last_cycle;
-               if ((cnt & 0xff) == 0)
-                       printf("%u\n", (u32)(sum / cnt));
-       }
-       last_cycle = psxRegs.cycle;
-#endif
 }
 
 typedef void (irq_func)();
@@ -68,6 +60,8 @@ static irq_func * const irq_funcs[] = {
        [PSXINT_CDRDMA] = cdrDmaInterrupt,
        [PSXINT_CDRLID] = cdrLidSeekInterrupt,
        [PSXINT_CDRPLAY] = cdrPlayInterrupt,
+       [PSXINT_SPU_UPDATE] = spuUpdate,
+       [PSXINT_RCNT] = psxRcntUpdate,
 };
 
 /* local dupe of psxBranchTest, using event_cycles */
@@ -77,9 +71,6 @@ static void irq_test(void)
        u32 cycle = psxRegs.cycle;
        u32 irq, irq_bits;
 
-       if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
-               psxRcntUpdate();
-
        // irq_funcs() may queue more irqs
        psxRegs.interrupt = 0;
 
@@ -116,31 +107,47 @@ void gen_interupt()
 // from interpreter
 extern void MTC0(int reg, u32 val);
 
-void pcsx_mtc0(u32 reg)
+void pcsx_mtc0(u32 reg, u32 val)
 {
-       evprintf("MTC0 %d #%x @%08x %u\n", reg, readmem_word, psxRegs.pc, psxRegs.cycle);
-       MTC0(reg, readmem_word);
+       evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
+       MTC0(reg, val);
        gen_interupt();
+       if (Cause & Status & 0x0300) // possible sw irq
+               pending_exception = 1;
 }
 
-void pcsx_mtc0_ds(u32 reg)
+void pcsx_mtc0_ds(u32 reg, u32 val)
 {
-       evprintf("MTC0 %d #%x @%08x %u\n", reg, readmem_word, psxRegs.pc, psxRegs.cycle);
-       MTC0(reg, readmem_word);
+       evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
+       MTC0(reg, val);
 }
 
 void new_dyna_save(void)
 {
+       psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat
+
        // psxRegs.intCycle is always maintained, no need to convert
 }
 
+void new_dyna_after_save(void)
+{
+       psxRegs.interrupt |= 1 << PSXINT_RCNT;
+}
+
 void new_dyna_restore(void)
 {
        int i;
        for (i = 0; i < PSXINT_COUNT; i++)
                event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
+
+       event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
+       psxRegs.interrupt |=  1 << PSXINT_RCNT;
+       psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
+
+       new_dyna_pcsx_mem_load_state();
 }
 
+/* GTE stuff */
 void *gte_handlers[64];
 
 void *gte_handlers_nf[64] = {
@@ -154,6 +161,17 @@ void *gte_handlers_nf[64] = {
        NULL      , NULL       , NULL       , NULL      , NULL     , gteGPF_nf  , gteGPL_nf  , gteNCCT_nf, // 38
 };
 
+const char *gte_regnames[64] = {
+       NULL  , "RTPS" , NULL   , NULL  , NULL , NULL   , "NCLIP", NULL  , // 00
+       NULL  , NULL   , NULL   , NULL  , "OP" , NULL   , NULL   , NULL  , // 08
+       "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL   , "NCDT" , NULL  , // 10
+       NULL  , NULL   , NULL   , "NCCS", "CC" , NULL   , "NCS"  , NULL  , // 18
+       "NCT" , NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 20
+       "SQR" , "DCPL" , "DPCT" , NULL  , NULL , "AVSZ3", "AVSZ4", NULL  , // 28 
+       "RTPT", NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 30
+       NULL  , NULL   , NULL   , NULL  , NULL , "GPF"  , "GPL"  , "NCCT", // 38
+};
+
 /* from gte.txt.. not sure if this is any good. */
 const char gte_cycletab[64] = {
        /*   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f */
@@ -163,10 +181,87 @@ const char gte_cycletab[64] = {
        23,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5,  5, 39,
 };
 
+#define GCBIT(x) \
+       (1ll << (32+x))
+#define GDBIT(x) \
+       (1ll << (x))
+#define GCBITS3(b0,b1,b2) \
+       (GCBIT(b0) | GCBIT(b1) | GCBIT(b2))
+#define GDBITS2(b0,b1) \
+       (GDBIT(b0) | GDBIT(b1))
+#define GDBITS3(b0,b1,b2) \
+       (GDBITS2(b0,b1) | GDBIT(b2))
+#define GDBITS4(b0,b1,b2,b3) \
+       (GDBITS3(b0,b1,b2) | GDBIT(b3))
+#define GDBITS5(b0,b1,b2,b3,b4) \
+       (GDBITS4(b0,b1,b2,b3) | GDBIT(b4))
+#define GDBITS6(b0,b1,b2,b3,b4,b5) \
+       (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5))
+#define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \
+       (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6))
+#define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \
+       (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7))
+#define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \
+       (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8))
+#define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \
+       (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9))
+
+const uint64_t gte_reg_reads[64] = {
+       [GTE_RTPS]  = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19),
+       [GTE_NCLIP] =                        GDBITS3(12,13,14),
+       [GTE_OP]    = GCBITS3(0,2,4)       | GDBITS3(9,10,11),
+       [GTE_DPCS]  = GCBITS3(21,22,23)    | GDBITS4(6,8,21,22),
+       [GTE_INTPL] = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
+       [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further?
+       [GTE_NCDS]  = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22),
+       [GTE_CDP]   = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22),
+       [GTE_NCDT]  = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8),
+       [GTE_NCCS]  = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
+       [GTE_CC]    = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22),
+       [GTE_NCS]   = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22),
+       [GTE_NCT]   = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
+       [GTE_SQR]   =                        GDBITS3(9,10,11),
+       [GTE_DCPL]  = GCBITS3(21,22,23)    | GDBITS7(6,8,9,10,11,21,22),
+       [GTE_DPCT]  = GCBITS3(21,22,23)    | GDBITS4(8,20,21,22),
+       [GTE_AVSZ3] = GCBIT(29)            | GDBITS3(17,18,19),
+       [GTE_AVSZ4] = GCBIT(30)            | GDBITS4(16,17,18,19),
+       [GTE_RTPT]  = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19),
+       [GTE_GPF]   =                        GDBITS7(6,8,9,10,11,21,22),
+       [GTE_GPL]   =                        GDBITS10(6,8,9,10,11,21,22,25,26,27),
+       [GTE_NCCT]  = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6),
+};
+
+// note: this excludes gteFLAG that is always written to
+const uint64_t gte_reg_writes[64] = {
+       [GTE_RTPS]  = 0x0f0f7f00ll,
+       [GTE_NCLIP] = GDBIT(24),
+       [GTE_OP]    = GDBITS6(9,10,11,25,26,27),
+       [GTE_DPCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27),
+       [GTE_NCDS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_CDP]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCDT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCCS]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_CC]    = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCS]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCT]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_SQR]   = GDBITS6(9,10,11,25,26,27),
+       [GTE_DCPL]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_DPCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_AVSZ3] = GDBITS2(7,24),
+       [GTE_AVSZ4] = GDBITS2(7,24),
+       [GTE_RTPT]  = 0x0f0f7f00ll,
+       [GTE_GPF]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_GPL]   = GDBITS9(9,10,11,20,21,22,25,26,27),
+       [GTE_NCCT]  = GDBITS9(9,10,11,20,21,22,25,26,27),
+};
+
 static int ari64_init()
 {
        extern void (*psxCP2[64])();
        extern void psxNULL();
+       extern u_char *out;
        size_t i;
 
        new_dynarec_init();
@@ -176,18 +271,29 @@ static int ari64_init()
                if (psxCP2[i] != psxNULL)
                        gte_handlers[i] = psxCP2[i];
 
-#if !defined(DRC_DBG) && !defined(PCNT)
-#ifdef __arm__
+#if defined(__arm__) && !defined(DRC_DBG)
        gte_handlers[0x06] = gteNCLIP_arm;
+#ifdef HAVE_ARMV5
+       gte_handlers_nf[0x01] = gteRTPS_nf_arm;
+       gte_handlers_nf[0x30] = gteRTPT_nf_arm;
 #endif
 #ifdef __ARM_NEON__
-       // compiler's _nf version is still a lot slower then neon
+       // compiler's _nf version is still a lot slower than neon
+       // _nf_arm RTPS is roughly the same, RTPT slower
        gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
        gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
-       gte_handlers[0x12] = gte_handlers_nf[0x12] = gteMVMVA_neon;
 #endif
+#endif
+#ifdef DRC_DBG
+       memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
 #endif
        psxH_ptr = psxH;
+       zeromem_ptr = zero_mem;
+       scratch_buf_ptr = scratch_buf;
+
+       SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n");
+       SysPrintf("%08x/%08x/%08x/%08x/%08x\n",
+               psxM, psxH, psxR, mem_rtab, out);
 
        return 0;
 }
@@ -246,6 +352,7 @@ static void ari64_clear(u32 addr, u32 size)
 static void ari64_shutdown()
 {
        new_dynarec_cleanup();
+       new_dyna_pcsx_mem_shutdown();
 }
 
 extern void intExecute();
@@ -260,7 +367,7 @@ extern void intExecuteBlockT();
 R3000Acpu psxRec = {
        ari64_init,
        ari64_reset,
-#if defined(__arm__)
+#ifndef DRC_DISABLE
        ari64_execute,
        ari64_execute_until,
 #else
@@ -277,14 +384,20 @@ void do_insn_trace() {}
 void do_insn_cmp() {}
 #endif
 
-#if defined(__x86_64__) || defined(__i386__)
-unsigned int address, readmem_word, word;
-unsigned short hword;
-unsigned char byte;
+#ifdef DRC_DISABLE
+unsigned int address;
 int pending_exception, stop;
 unsigned int next_interupt;
+int new_dynarec_did_compile;
+int cycle_multiplier;
+int new_dynarec_hacks;
 void *psxH_ptr;
-void new_dynarec_init() {}
+void *zeromem_ptr;
+u8 zero_mem[0x1000];
+u_char *out;
+void *mem_rtab;
+void *scratch_buf_ptr;
+void new_dynarec_init() { (void)ari64_execute; }
 void new_dyna_start() {}
 void new_dynarec_cleanup() {}
 void new_dynarec_clear_full() {}
@@ -292,6 +405,8 @@ void invalidate_all_pages() {}
 void invalidate_block(unsigned int block) {}
 void new_dyna_pcsx_mem_init(void) {}
 void new_dyna_pcsx_mem_reset(void) {}
+void new_dyna_pcsx_mem_load_state(void) {}
+void new_dyna_pcsx_mem_shutdown(void) {}
 #endif
 
 #ifdef DRC_DBG
@@ -329,16 +444,18 @@ void do_insn_trace(void)
        static psxRegisters oldregs;
        static u32 old_io_addr = (u32)-1;
        static u32 old_io_data = 0xbad0c0de;
+       static u32 event_cycles_o[PSXINT_COUNT];
        u32 *allregs_p = (void *)&psxRegs;
        u32 *allregs_o = (void *)&oldregs;
        u32 io_data;
        int i;
        u8 byte;
 
-//last_io_addr = 0x5e2c8;
+       //last_io_addr = 0x5e2c8;
        if (f == NULL)
                f = fopen("tracelog", "wb");
 
+       // log reg changes
        oldregs.code = psxRegs.code; // don't care
        for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
                if (allregs_p[i] != allregs_o[i]) {
@@ -347,6 +464,17 @@ void do_insn_trace(void)
                        allregs_o[i] = allregs_p[i];
                }
        }
+       // log event changes
+       for (i = 0; i < PSXINT_COUNT; i++) {
+               if (event_cycles[i] != event_cycles_o[i]) {
+                       byte = 0xfc;
+                       fwrite(&byte, 1, 1, f);
+                       fwrite(&i, 1, 1, f);
+                       fwrite(&event_cycles[i], 1, 4, f);
+                       event_cycles_o[i] = event_cycles[i];
+               }
+       }
+       // log last io
        if (old_io_addr != last_io_addr) {
                byte = 0xfd;
                fwrite(&byte, 1, 1, f);
@@ -425,7 +553,8 @@ void do_insn_cmp(void)
        u32 *allregs_p = (void *)&psxRegs;
        u32 *allregs_e = (void *)&rregs;
        static u32 ppc, failcount;
-       int i, ret, bad = 0;
+       int i, ret, bad = 0, which_event = -1;
+       u32 ev_cycles = 0;
        u8 code;
 
        if (f == NULL)
@@ -438,18 +567,20 @@ void do_insn_cmp(void)
                        break;
                if (code == 0xff)
                        break;
-               if (code == 0xfd) {
-                       if ((ret = fread(&mem_addr, 1, 4, f)) <= 0)
-                               break;
+               switch (code) {
+               case 0xfc:
+                       which_event = 0;
+                       fread(&which_event, 1, 1, f);
+                       fread(&ev_cycles, 1, 4, f);
                        continue;
-               }
-               if (code == 0xfe) {
-                       if ((ret = fread(&mem_val, 1, 4, f)) <= 0)
-                               break;
+               case 0xfd:
+                       fread(&mem_addr, 1, 4, f);
+                       continue;
+               case 0xfe:
+                       fread(&mem_val, 1, 4, f);
                        continue;
                }
-               if ((ret = fread(&allregs_e[code], 1, 4, f)) <= 0)
-                       break;
+               fread(&allregs_e[code], 1, 4, f);
        }
 
        if (ret <= 0) {
@@ -461,7 +592,7 @@ void do_insn_cmp(void)
        psxRegs.cycle = rregs.cycle;
        psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
 
-//if (psxRegs.cycle == 166172) breakme();
+       //if (psxRegs.cycle == 166172) breakme();
 
        if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
                        mem_val == memcheck_read(mem_addr)
@@ -482,6 +613,11 @@ void do_insn_cmp(void)
                goto end;
        }
 
+       if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
+               printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles);
+               goto end;
+       }
+
        if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
                static int last_mcycle;
                if (last_mcycle != psxRegs.cycle >> 20) {