inline/parametrize rootcounter reads
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / emu_if.c
index cd44726..6957689 100644 (file)
-// pending_exception?
-// swi 0 in do_unalignedwritestub?
+/*
+ * (C) GraÅžvydas "notaz" Ignotas, 2010-2011
+ *
+ * This work is licensed under the terms of GNU GPL version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
 #include <stdio.h>
 
 #include "emu_if.h"
-#include "../psxmem.h"
+#include "pcsxmem.h"
 #include "../psxhle.h"
+#include "../r3000a.h"
+#include "../cdrom.h"
+#include "../psxdma.h"
+#include "../mdec.h"
+#include "../gte_arm.h"
+#include "../gte_neon.h"
+#define FLAGLESS
+#include "../gte.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
 
-//#define memprintf printf
-#define memprintf(...)
 //#define evprintf printf
 #define evprintf(...)
 
 char invalid_code[0x100000];
+u32 event_cycles[PSXINT_COUNT];
 
-void MTC0_()
+static void schedule_timeslice(void)
 {
-       extern void psxMTC0();
+       u32 i, c = psxRegs.cycle;
+       s32 min, dif;
+
+       min = psxNextsCounter + psxNextCounter - c;
+       for (i = 0; i < ARRAY_SIZE(event_cycles); i++) {
+               dif = event_cycles[i] - c;
+               //evprintf("  ev %d\n", dif);
+               if (0 < dif && dif < min)
+                       min = dif;
+       }
+       next_interupt = c + min;
 
-       memprintf("ari64 MTC0 %08x\n", psxRegs.code);
-       psxMTC0();
-       pending_exception = 1; /* FIXME? */
+#if 0
+       static u32 cnt, last_cycle;
+       static u64 sum;
+       if (last_cycle) {
+               cnt++;
+               sum += psxRegs.cycle - last_cycle;
+               if ((cnt & 0xff) == 0)
+                       printf("%u\n", (u32)(sum / cnt));
+       }
+       last_cycle = psxRegs.cycle;
+#endif
 }
 
-void gen_interupt()
+typedef void (irq_func)();
+
+static irq_func * const irq_funcs[] = {
+       [PSXINT_SIO]    = sioInterrupt,
+       [PSXINT_CDR]    = cdrInterrupt,
+       [PSXINT_CDREAD] = cdrReadInterrupt,
+       [PSXINT_GPUDMA] = gpuInterrupt,
+       [PSXINT_MDECOUTDMA] = mdec1Interrupt,
+       [PSXINT_SPUDMA] = spuInterrupt,
+       [PSXINT_MDECINDMA] = mdec0Interrupt,
+       [PSXINT_GPUOTCDMA] = gpuotcInterrupt,
+       [PSXINT_CDRDMA] = cdrDmaInterrupt,
+       [PSXINT_CDRLID] = cdrLidSeekInterrupt,
+       [PSXINT_CDRPLAY] = cdrPlayInterrupt,
+};
+
+/* local dupe of psxBranchTest, using event_cycles */
+static void irq_test(void)
 {
-       evprintf("ari64_gen_interupt\n");
-       evprintf("  +ge %08x, %d->%d\n", psxRegs.pc, psxRegs.cycle, next_interupt);
-#ifdef DRC_DBG
-       psxRegs.cycle += 2;
-#endif
+       u32 irqs = psxRegs.interrupt;
+       u32 cycle = psxRegs.cycle;
+       u32 irq, irq_bits;
 
-       psxBranchTest();
+       if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
+               psxRcntUpdate();
 
-       next_interupt = psxNextsCounter + psxNextCounter;
-       evprintf("  -ge %08x, %d->%d\n", psxRegs.pc, psxRegs.cycle, next_interupt);
+       // irq_funcs() may queue more irqs
+       psxRegs.interrupt = 0;
 
-       pending_exception = 1; /* FIXME */
-}
+       for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) {
+               if (!(irq_bits & 1))
+                       continue;
+               if ((s32)(cycle - event_cycles[irq]) >= 0) {
+                       irqs &= ~(1 << irq);
+                       irq_funcs[irq]();
+               }
+       }
+       psxRegs.interrupt |= irqs;
 
-void check_interupt()
-{
-       printf("ari64_check_interupt\n");
+       if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) {
+               psxException(0x400, 0);
+               pending_exception = 1;
+       }
 }
 
-void read_nomem_new()
+void gen_interupt()
 {
-       printf("ari64_read_nomem_new\n");
-}
+       evprintf("  +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt);
 
-static void read_mem8()
-{
-       memprintf("ari64_read_mem8  %08x, PC~=%08x\n", address, psxRegs.pc);
-       readmem_word = psxMemRead8(address) & 0xff;
-}
+       irq_test();
+       //psxBranchTest();
+       //pending_exception = 1;
 
-static void read_mem16()
-{
-       memprintf("ari64_read_mem16 %08x, PC~=%08x\n", address, psxRegs.pc);
-       readmem_word = psxMemRead16(address) & 0xffff;
+       schedule_timeslice();
+
+       evprintf("  -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
+               next_interupt, next_interupt - psxRegs.cycle);
 }
 
-static void read_mem32()
+// from interpreter
+extern void MTC0(int reg, u32 val);
+
+void pcsx_mtc0(u32 reg, u32 val)
 {
-       memprintf("ari64_read_mem32 %08x, PC~=%08x\n", address, psxRegs.pc);
-       readmem_word = psxMemRead32(address);
+       evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
+       MTC0(reg, val);
+       gen_interupt();
 }
 
-static void write_mem8()
+void pcsx_mtc0_ds(u32 reg, u32 val)
 {
-       memprintf("ari64_write_mem8  %08x,       %02x, PC~=%08x\n", address, byte, psxRegs.pc);
-       psxMemWrite8(address, byte);
+       evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
+       MTC0(reg, val);
 }
 
-static void write_mem16()
+void new_dyna_save(void)
 {
-       memprintf("ari64_write_mem16 %08x,     %04x, PC~=%08x\n", address, hword, psxRegs.pc);
-       psxMemWrite16(address, hword);
+       // psxRegs.intCycle is always maintained, no need to convert
 }
 
-static void write_mem32()
+void new_dyna_restore(void)
 {
-       memprintf("ari64_write_mem32 %08x, %08x, PC~=%08x\n", address, word, psxRegs.pc);
-       psxMemWrite32(address, word);
-}
+       int i;
+       for (i = 0; i < PSXINT_COUNT; i++)
+               event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
 
-void (*readmem[0x10000])();
-void (*readmemb[0x10000])();
-void (*readmemh[0x10000])();
-void (*writemem[0x10000])();
-void (*writememb[0x10000])();
-void (*writememh[0x10000])();
+       new_dyna_pcsx_mem_load_state();
+}
 
 void *gte_handlers[64];
 
+void *gte_handlers_nf[64] = {
+       NULL      , gteRTPS_nf , NULL       , NULL      , NULL     , NULL       , gteNCLIP_nf, NULL      , // 00
+       NULL      , NULL       , NULL       , NULL      , gteOP_nf , NULL       , NULL       , NULL      , // 08
+       gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL       , gteNCDT_nf , NULL      , // 10
+       NULL      , NULL       , NULL       , gteNCCS_nf, gteCC_nf , NULL       , gteNCS_nf  , NULL      , // 18
+       gteNCT_nf , NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 20
+       gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL      , NULL     , gteAVSZ3_nf, gteAVSZ4_nf, NULL      , // 28 
+       gteRTPT_nf, NULL       , NULL       , NULL      , NULL     , NULL       , NULL       , NULL      , // 30
+       NULL      , NULL       , NULL       , NULL      , NULL     , gteGPF_nf  , gteGPL_nf  , gteNCCT_nf, // 38
+};
+
+const char *gte_regnames[64] = {
+       NULL  , "RTPS" , NULL   , NULL  , NULL , NULL   , "NCLIP", NULL  , // 00
+       NULL  , NULL   , NULL   , NULL  , "OP" , NULL   , NULL   , NULL  , // 08
+       "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL   , "NCDT" , NULL  , // 10
+       NULL  , NULL   , NULL   , "NCCS", "CC" , NULL   , "NCS"  , NULL  , // 18
+       "NCT" , NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 20
+       "SQR" , "DCPL" , "DPCT" , NULL  , NULL , "AVSZ3", "AVSZ4", NULL  , // 28 
+       "RTPT", NULL   , NULL   , NULL  , NULL , NULL   , NULL   , NULL  , // 30
+       NULL  , NULL   , NULL   , NULL  , NULL , "GPF"  , "GPL"  , "NCCT", // 38
+};
+
 /* from gte.txt.. not sure if this is any good. */
 const char gte_cycletab[64] = {
        /*   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f */
@@ -109,42 +183,83 @@ static int ari64_init()
        size_t i;
 
        new_dynarec_init();
+       new_dyna_pcsx_mem_init();
 
-       for (i = 0; i < sizeof(readmem) / sizeof(readmem[0]); i++) {
-               readmemb[i] = read_mem8;
-               readmemh[i] = read_mem16;
-               readmem[i] = read_mem32;
-               writememb[i] = write_mem8;
-               writememh[i] = write_mem16;
-               writemem[i] = write_mem32;
-       }
-
-       for (i = 0; i < sizeof(gte_handlers) / sizeof(gte_handlers[0]); i++)
+       for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
                if (psxCP2[i] != psxNULL)
                        gte_handlers[i] = psxCP2[i];
 
-       psxHLEt_addr = (void *)psxHLEt; // FIXME: rm
+#if !defined(DRC_DBG) && !defined(PCNT)
+#ifdef __arm__
+       gte_handlers[0x06] = gteNCLIP_arm;
+       gte_handlers_nf[0x01] = gteRTPS_nf_arm;
+       gte_handlers_nf[0x30] = gteRTPT_nf_arm;
+#endif
+#ifdef __ARM_NEON__
+       // compiler's _nf version is still a lot slower then neon
+       // _nf_arm RTPS is roughly the same, RTPT slower
+       gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
+       gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
+       gte_handlers[0x12] = gte_handlers_nf[0x12] = gteMVMVA_neon;
+#endif
+#endif
+#ifdef DRC_DBG
+       memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
+#endif
+       psxH_ptr = psxH;
+
        return 0;
 }
 
 static void ari64_reset()
 {
-       /* hmh */
        printf("ari64_reset\n");
+       new_dyna_pcsx_mem_reset();
+       invalidate_all_pages();
+       new_dyna_restore();
+       pending_exception = 1;
 }
 
-static void ari64_execute()
+// execute until predefined leave points
+// (HLE softcall exit and BIOS fastboot end)
+static void ari64_execute_until()
 {
-       next_interupt = psxNextsCounter + psxNextCounter;
+       schedule_timeslice();
+
+       evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
+               psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
 
-       evprintf("psxNextsCounter %d, psxNextCounter %d\n", psxNextsCounter, psxNextCounter);
-       evprintf("ari64_execute %08x, %d->%d\n", psxRegs.pc, psxRegs.cycle, next_interupt);
        new_dyna_start();
-       evprintf("ari64_execute end %08x, %d->%d\n", psxRegs.pc, psxRegs.cycle, next_interupt);
+
+       evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
+               psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
+}
+
+static void ari64_execute()
+{
+       while (!stop) {
+               ari64_execute_until();
+               evprintf("drc left @%08x\n", psxRegs.pc);
+       }
 }
 
-static void ari64_clear(u32 Addr, u32 Size)
+static void ari64_clear(u32 addr, u32 size)
 {
+       u32 start, end, main_ram;
+
+       size *= 4; /* PCSX uses DMA units */
+
+       evprintf("ari64_clear %08x %04x\n", addr, size);
+
+       /* check for RAM mirrors */
+       main_ram = (addr & 0xffe00000) == 0x80000000;
+
+       start = addr >> 12;
+       end = (addr + size) >> 12;
+
+       for (; start <= end; start++)
+               if (!main_ram || !invalid_code[start])
+                       invalidate_block(start);
 }
 
 static void ari64_shutdown()
@@ -164,9 +279,9 @@ extern void intExecuteBlockT();
 R3000Acpu psxRec = {
        ari64_init,
        ari64_reset,
-#if 1
-       ari64_execute,
+#if defined(__arm__)
        ari64_execute,
+       ari64_execute_until,
 #else
        intExecuteT,
        intExecuteBlockT,
@@ -185,12 +300,20 @@ void do_insn_cmp() {}
 unsigned int address, readmem_word, word;
 unsigned short hword;
 unsigned char byte;
-int pending_exception;
+int pending_exception, stop;
 unsigned int next_interupt;
-void *psxHLEt_addr;
+int new_dynarec_did_compile;
+int cycle_multiplier;
+void *psxH_ptr;
 void new_dynarec_init() {}
-int  new_dyna_start() {}
+void new_dyna_start() {}
 void new_dynarec_cleanup() {}
+void new_dynarec_clear_full() {}
+void invalidate_all_pages() {}
+void invalidate_block(unsigned int block) {}
+void new_dyna_pcsx_mem_init(void) {}
+void new_dyna_pcsx_mem_reset(void) {}
+void new_dyna_pcsx_mem_load_state(void) {}
 #endif
 
 #ifdef DRC_DBG
@@ -202,10 +325,27 @@ extern u32 last_io_addr;
 static void dump_mem(const char *fname, void *mem, size_t size)
 {
        FILE *f1 = fopen(fname, "wb");
+       if (f1 == NULL)
+               f1 = fopen(strrchr(fname, '/') + 1, "wb");
        fwrite(mem, 1, size, f1);
        fclose(f1);
 }
 
+static u32 memcheck_read(u32 a)
+{
+       if ((a >> 16) == 0x1f80)
+               // scratchpad/IO
+               return *(u32 *)(psxH + (a & 0xfffc));
+
+       if ((a >> 16) == 0x1f00)
+               // parallel
+               return *(u32 *)(psxP + (a & 0xfffc));
+
+//     if ((a & ~0xe0600000) < 0x200000)
+       // RAM
+       return *(u32 *)(psxM + (a & 0x1ffffc));
+}
+
 void do_insn_trace(void)
 {
        static psxRegisters oldregs;
@@ -213,7 +353,7 @@ void do_insn_trace(void)
        static u32 old_io_data = 0xbad0c0de;
        u32 *allregs_p = (void *)&psxRegs;
        u32 *allregs_o = (void *)&oldregs;
-       u32 *io_data;
+       u32 io_data;
        int i;
        u8 byte;
 
@@ -235,12 +375,12 @@ void do_insn_trace(void)
                fwrite(&last_io_addr, 1, 4, f);
                old_io_addr = last_io_addr;
        }
-       io_data = (void *)(psxM + (last_io_addr&0x1ffffc));
-       if (old_io_data != *io_data) {
+       io_data = memcheck_read(last_io_addr);
+       if (old_io_data != io_data) {
                byte = 0xfe;
                fwrite(&byte, 1, 1, f);
-               fwrite(io_data, 1, 4, f);
-               old_io_data = *io_data;
+               fwrite(&io_data, 1, 4, f);
+               old_io_data = io_data;
        }
        byte = 0xff;
        fwrite(&byte, 1, 1, f);
@@ -279,6 +419,25 @@ static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
        "PC", "code", "cycle", "interrupt",
 };
 
+static struct {
+       int reg;
+       u32 val, val_expect;
+       u32 pc, cycle;
+} miss_log[64];
+static int miss_log_i;
+#define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0]))
+#define miss_log_mask (miss_log_len-1)
+
+static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle)
+{
+       miss_log[miss_log_i].reg = reg;
+       miss_log[miss_log_i].val = val;
+       miss_log[miss_log_i].val_expect = val_expect;
+       miss_log[miss_log_i].pc = pc;
+       miss_log[miss_log_i].cycle = cycle;
+       miss_log_i = (miss_log_i + 1) & miss_log_mask;
+}
+
 void breakme() {}
 
 void do_insn_cmp(void)
@@ -321,16 +480,13 @@ void do_insn_cmp(void)
        }
 
        psxRegs.code = rregs.code; // don't care
-psxRegs.cycle = rregs.cycle;
-psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
+       psxRegs.cycle = rregs.cycle;
+       psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
 
 //if (psxRegs.cycle == 166172) breakme();
-//if (psxRegs.cycle > 11296376) printf("pc=%08x %u  %08x\n", psxRegs.pc, psxRegs.cycle, psxRegs.interrupt);
-
-       mem_addr &= 0x1ffffc;
 
        if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
-                       mem_val == *(u32 *)(psxM + mem_addr)
+                       mem_val == memcheck_read(mem_addr)
           ) {
                failcount = 0;
                goto ok;
@@ -338,24 +494,35 @@ psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
 
        for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
                if (allregs_p[i] != allregs_e[i]) {
-                       printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
-                               regnames[i], allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
+                       miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
                        bad++;
                }
        }
 
-       if (mem_val != *(u32 *)(psxM + mem_addr)) {
-               printf("bad mem @%08x: %08x %08x\n", mem_addr, *(u32 *)(psxM + mem_addr), mem_val);
+       if (mem_val != memcheck_read(mem_addr)) {
+               printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val);
                goto end;
        }
 
        if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
-               printf("-- %d\n", bad);
+               static int last_mcycle;
+               if (last_mcycle != psxRegs.cycle >> 20) {
+                       printf("%u\n", psxRegs.cycle);
+                       last_mcycle = psxRegs.cycle >> 20;
+               }
                failcount++;
                goto ok;
        }
 
 end:
+       for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask)
+               printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
+                       regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val,
+                       miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle);
+       printf("-- %d\n", bad);
+       for (i = 0; i < 8; i++)
+               printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
+                       i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
        printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
        dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
        dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);