drc: further hacks, hle handling
authornotaz <notasas@gmail.com>
Tue, 30 Nov 2010 21:56:59 +0000 (23:56 +0200)
committernotaz <notasas@gmail.com>
Wed, 1 Dec 2010 23:17:34 +0000 (01:17 +0200)
Makefile
frontend/config.h
frontend/main.c
libpcsxcore/new_dynarec/assem_arm.c
libpcsxcore/new_dynarec/assem_arm.h
libpcsxcore/new_dynarec/emu_if.c
libpcsxcore/new_dynarec/emu_if.h
libpcsxcore/new_dynarec/linkage_arm.s
libpcsxcore/new_dynarec/new_dynarec.c
libpcsxcore/new_dynarec/new_dynarec.h

index 28b3d7c..79a8f2d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -23,8 +23,10 @@ OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore
        libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \
        libpcsxcore/sio.o libpcsxcore/socket.o libpcsxcore/spu.o
 # dynarec
-OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o \
-       libpcsxcore/new_dynarec/emu_if.o
+ifndef NO_NEW_DRC
+OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o
+endif
+OBJS += libpcsxcore/new_dynarec/emu_if.o
 libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm.c
 
 # spu
index e8b255d..768460f 100644 (file)
@@ -4,5 +4,5 @@
 #define PACKAGE_NAME "pcsx"
 #define PACKAGE_VERSION "1.9"
 #define DEF_PLUGIN_DIR "."
-#define EMU_LOG printf
+//#define EMU_LOG printf
 #define USEOSS
index 884b617..fd1c119 100644 (file)
@@ -87,7 +87,6 @@ int main(int argc, char *argv[])
 
        emuLog = stdout;
        SetIsoFile(NULL);
-       Config.PsxOut = 1;
 
        // read command line options
        for (i = 1; i < argc; i++) {
@@ -353,19 +352,6 @@ void SysPrintf(const char *fmt, ...) {
        vsprintf(msg, fmt, list);
        va_end(list);
 
-       if (Config.PsxOut) {
-               static char linestart = 1;
-               int l = strlen(msg);
-
-               printf(linestart ? " * %s" : "%s", msg);
-
-               if (l > 0 && msg[l - 1] == '\n') {
-                       linestart = 1;
-               } else {
-                       linestart = 0;
-               }
-       }
-
        fprintf(emuLog, "%s", msg);
 }
 
index ea1da1b..4bddd8c 100644 (file)
@@ -3242,12 +3242,11 @@ void cop0_assemble(int i,struct regstat *i_regs)
     char copr=(source[i]>>11)&0x1f;
     //assert(t>=0); // Why does this happen?  OOT is weird
     if(t>=0) {
-#ifdef MUPEN64 /// FIXME
+#ifdef MUPEN64
       emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
       emit_movimm((source[i]>>11)&0x1f,1);
       emit_writeword(0,(int)&PC);
       emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
-#endif
       if(copr==9) {
         emit_readword((int)&last_count,ECX);
         emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
@@ -3257,6 +3256,9 @@ void cop0_assemble(int i,struct regstat *i_regs)
       }
       emit_call((int)MFC0);
       emit_readword((int)&readmem_dword,t);
+#else
+      emit_readword((int)&reg_cop0+copr*4,t);
+#endif
     }
   }
   else if(opcode2[i]==4) // MTC0
@@ -3272,7 +3274,11 @@ void cop0_assemble(int i,struct regstat *i_regs)
     emit_writeword(0,(int)&PC);
     emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
 #endif
-    if(copr==9||copr==11||copr==12) {
+#ifdef PCSX
+    emit_movimm(source[i],0);
+    emit_writeword(0,(int)&psxRegs.code);
+#endif
+    if(copr==9||copr==11||copr==12||copr==13) {
       emit_readword((int)&last_count,ECX);
       emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
       emit_add(HOST_CCREG,ECX,HOST_CCREG);
@@ -3283,7 +3289,7 @@ void cop0_assemble(int i,struct regstat *i_regs)
     // so needs a special case to handle a pending interrupt.
     // The interrupt must be taken immediately, because a subsequent
     // instruction might disable interrupts again.
-    if(copr==12&&!is_delayslot) {
+    if(copr==12||copr==13) {
       emit_movimm(start+i*4+4,0);
       emit_movimm(0,1);
       emit_writeword(0,(int)&pcaddr);
@@ -3292,7 +3298,7 @@ void cop0_assemble(int i,struct regstat *i_regs)
     //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
     //else
     emit_call((int)MTC0);
-    if(copr==9||copr==11||copr==12) {
+    if(copr==9||copr==11||copr==12||copr==13) {
       emit_readword((int)&Count,HOST_CCREG);
       emit_readword((int)&next_interupt,ECX);
       emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
@@ -3300,14 +3306,14 @@ void cop0_assemble(int i,struct regstat *i_regs)
       emit_writeword(ECX,(int)&last_count);
       emit_storereg(CCREG,HOST_CCREG);
     }
-    if(copr==12) {
+    if(copr==12||copr==13) {
       assert(!is_delayslot);
       emit_readword((int)&pending_exception,14);
     }
     emit_loadreg(rs1[i],s);
     if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
       emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
-    if(copr==12) {
+    if(copr==12||copr==13) {
       emit_test(14,14);
       emit_jne((int)&do_interrupt);
     }
index d97c0f2..f9c9b84 100644 (file)
@@ -12,6 +12,7 @@
 //#define MUPEN64
 #define FORCE32 1
 #define DISABLE_COP1 1
+#define PCSX 1
 
 #ifdef FORCE32
 #define REG_SHIFT 2
@@ -45,7 +46,7 @@
 
 extern char *invc_ptr;
 
-#define BASE_ADDR 0x2000000 // Code generator target address
+#define BASE_ADDR 0x1000000 // Code generator target address
 #define TARGET_SIZE_2 24 // 2^24 = 16 megabytes
 
 // This is defined in linkage_arm.s, but gcc -O3 likes this better
index cee61cc..d0b991e 100644 (file)
@@ -1,37 +1,96 @@
+// cycles after syscall/hle?
+// pending_exception?
 #include <stdio.h>
 
 #include "emu_if.h"
+#include "../psxmem.h"
+#include "../psxhle.h"
+
+//#define memprintf printf
+#define memprintf(...)
+//#define evprintf printf
+#define evprintf(...)
+
+//#define DRC_DBG
 
 char invalid_code[0x100000];
 
-void MFC0(void)
+void MTC0_()
 {
-       printf("MFC0!\n");
+       extern void psxMTC0();
+
+       printf("ari64 MTC0 %08x\n", psxRegs.code);
+       psxMTC0();
+       pending_exception = 1; /* FIXME? */
 }
 
 void gen_interupt()
 {
-       printf("gen_interupt\n");
+       evprintf("ari64_gen_interupt\n");
+       evprintf("  +ge %08x, %d->%d\n", psxRegs.pc, Count, next_interupt);
+#ifdef DRC_DBG
+       psxRegs.cycle += 2;
+#else
+       psxRegs.cycle = Count; // stupid
+#endif
+
+       psxBranchTest();
+
+       if (psxRegs.cycle != Count) {
+               printf("psxRegs.cycle != Count: %d != %d\n", psxRegs.cycle, Count);
+               Count = psxRegs.cycle;
+       }
+
+       next_interupt = Count + psxNextCounter;
+       evprintf("  -ge %08x, %d->%d\n", psxRegs.pc, Count, next_interupt);
+
+       pending_exception = 1; /* FIXME */
 }
 
 void check_interupt()
 {
-       printf("check_interupt\n");
+       printf("ari64_check_interupt\n");
 }
 
 void read_nomem_new()
 {
-       printf("read_nomem_new\n");
+       printf("ari64_read_nomem_new\n");
+}
+
+static void read_mem8()
+{
+       memprintf("ari64_read_mem8  %08x, PC~=%08x\n", address, psxRegs.pc);
+       readmem_word = psxMemRead8(address) & 0xff;
+}
+
+static void read_mem16()
+{
+       memprintf("ari64_read_mem16 %08x, PC~=%08x\n", address, psxRegs.pc);
+       readmem_word = psxMemRead16(address) & 0xffff;
+}
+
+static void read_mem32()
+{
+       memprintf("ari64_read_mem32 %08x, PC~=%08x\n", address, psxRegs.pc);
+       readmem_word = psxMemRead32(address);
+}
+
+static void write_mem8()
+{
+       memprintf("ari64_write_mem8  %08x,       %02x, PC~=%08x\n", address, byte, psxRegs.pc);
+       psxMemWrite8(address, byte);
 }
 
-static void read_mem()
+static void write_mem16()
 {
-       printf("read_mem %08x\n", address);
+       memprintf("ari64_write_mem16 %08x,     %04x, PC~=%08x\n", address, hword, psxRegs.pc);
+       psxMemWrite16(address, hword);
 }
 
-static void write_mem()
+static void write_mem32()
 {
-       printf("write_mem %08x\n", address);
+       memprintf("ari64_write_mem32 %08x, %08x, PC~=%08x\n", address, word, psxRegs.pc);
+       psxMemWrite32(address, word);
 }
 
 void (*readmem[0x10000])();
@@ -48,13 +107,15 @@ static int ari64_init()
        new_dynarec_init();
 
        for (i = 0; i < sizeof(readmem) / sizeof(readmem[0]); i++) {
-               readmem[i] = read_mem;
-               writemem[i] = write_mem;
+               readmemb[i] = read_mem8;
+               readmemh[i] = read_mem16;
+               readmem[i] = read_mem32;
+               writememb[i] = write_mem8;
+               writememh[i] = write_mem16;
+               writemem[i] = write_mem32;
        }
-       memcpy(readmemb, readmem, sizeof(readmem));
-       memcpy(readmemh, readmem, sizeof(readmem));
-       memcpy(writememb, writemem, sizeof(writemem));
-       memcpy(writememh, writemem, sizeof(writemem));
+
+       psxHLEt_addr = (void *)psxHLEt;
 }
 
 static void ari64_reset()
@@ -65,15 +126,18 @@ static void ari64_reset()
 
 static void ari64_execute()
 {
-/*
-       FILE *f = fopen("/mnt/ntz/dev/pnd/tmp/ram.dump", "wb");
-       fwrite((void *)0x80000000, 1, 0x200000, f);
-       fclose(f);
-       exit(1);
-*/
-       printf("psxNextsCounter %d, psxNextCounter %d\n", psxNextsCounter, psxNextCounter);
-       printf("ari64_execute %08x\n", psxRegs.pc);
+       /* TODO: get rid of this cycle counter copying */
+       Count = psxRegs.cycle;
+       next_interupt = Count + psxNextCounter;
+
+       evprintf("psxNextsCounter %d, psxNextCounter %d\n", psxNextsCounter, psxNextCounter);
+       evprintf("ari64_execute %08x, %d->%d\n", psxRegs.pc, Count, next_interupt);
        new_dyna_start(psxRegs.pc);
+       evprintf("ari64_execute end %08x, %d->%d\n", psxRegs.pc, Count, next_interupt);
+
+#ifndef DRC_DBG
+       psxRegs.cycle = Count;
+#endif
 }
 
 static void ari64_clear(u32 Addr, u32 Size)
@@ -85,11 +149,217 @@ static void ari64_shutdown()
        new_dynarec_cleanup();
 }
 
+extern void intExecute();
+extern void intExecuteT();
+extern void intExecuteBlock();
+extern void intExecuteBlockT();
+#ifndef DRC_DBG
+#define intExecuteT intExecute
+#define intExecuteBlockT intExecuteBlock
+#endif
+
 R3000Acpu psxRec = {
        ari64_init,
        ari64_reset,
+#if 1
        ari64_execute,
-// TODO        recExecuteBlock,
+       ari64_execute,
+#else
+       intExecuteT,
+       intExecuteBlockT,
+#endif
        ari64_clear,
        ari64_shutdown
 };
+
+// TODO: rm
+#ifndef DRC_DBG
+void do_insn_trace() {}
+void do_insn_cmp() {}
+#endif
+
+#if defined(__x86_64__) || defined(__i386__)
+unsigned int address, readmem_word, word;
+unsigned short hword;
+unsigned char byte;
+int pending_exception;
+unsigned int next_interupt;
+void *psxHLEt_addr;
+void new_dynarec_init() {}
+int  new_dyna_start() {}
+void new_dynarec_cleanup() {}
+#endif
+
+#ifdef DRC_DBG
+
+#include <stddef.h>
+static FILE *f;
+extern u32 last_io_addr;
+
+static void dump_mem(const char *fname, void *mem, size_t size)
+{
+       FILE *f1 = fopen(fname, "wb");
+       fwrite(mem, 1, size, f1);
+       fclose(f1);
+}
+
+void do_insn_trace(void)
+{
+       static psxRegisters oldregs;
+       static u32 old_io_addr = (u32)-1;
+       static u32 old_io_data = 0xbad0c0de;
+       u32 *allregs_p = (void *)&psxRegs;
+       u32 *allregs_o = (void *)&oldregs;
+       u32 *io_data;
+       int i;
+       u8 byte;
+
+//last_io_addr = 0x5e2c8;
+       if (f == NULL)
+               f = fopen("tracelog", "wb");
+
+       oldregs.code = psxRegs.code; // don't care
+       for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
+               if (allregs_p[i] != allregs_o[i]) {
+                       fwrite(&i, 1, 1, f);
+                       fwrite(&allregs_p[i], 1, 4, f);
+                       allregs_o[i] = allregs_p[i];
+               }
+       }
+       if (old_io_addr != last_io_addr) {
+               byte = 0xfd;
+               fwrite(&byte, 1, 1, f);
+               fwrite(&last_io_addr, 1, 4, f);
+               old_io_addr = last_io_addr;
+       }
+       io_data = (void *)(psxM + (last_io_addr&0x1ffffc));
+       if (old_io_data != *io_data) {
+               byte = 0xfe;
+               fwrite(&byte, 1, 1, f);
+               fwrite(io_data, 1, 4, f);
+               old_io_data = *io_data;
+       }
+       byte = 0xff;
+       fwrite(&byte, 1, 1, f);
+
+#if 0
+       if (psxRegs.cycle == 190230) {
+               dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000);
+               dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000);
+               printf("dumped\n");
+               exit(1);
+       }
+#endif
+}
+
+static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = {
+       "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+       "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+       "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+       "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+       "lo",  "hi",
+       "C0_0",  "C0_1",  "C0_2",  "C0_3",  "C0_4",  "C0_5",  "C0_6",  "C0_7",
+       "C0_8",  "C0_9",  "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15",
+       "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23",
+       "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31",
+
+       "C2D0",  "C2D1",  "C2D2",  "C2D3",  "C2D4",  "C2D5",  "C2D6",  "C2D7",
+       "C2D8",  "C2D9",  "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15",
+       "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23",
+       "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31",
+
+       "C2C0",  "C2C1",  "C2C2",  "C2C3",  "C2C4",  "C2C5",  "C2C6",  "C2C7",
+       "C2C8",  "C2C9",  "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15",
+       "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23",
+       "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31",
+
+       "PC", "code", "cycle", "interrupt",
+};
+
+void breakme() {}
+
+void do_insn_cmp(void)
+{
+       static psxRegisters rregs;
+       static u32 mem_addr, mem_val;
+       u32 *allregs_p = (void *)&psxRegs;
+       u32 *allregs_e = (void *)&rregs;
+       static u32 ppc, failcount;
+       int i, ret, bad = 0;
+       u8 code;
+
+       if (f == NULL)
+               f = fopen("tracelog", "rb");
+
+       while (1) {
+               if ((ret = fread(&code, 1, 1, f)) <= 0)
+                       break;
+               if (ret <= 0)
+                       break;
+               if (code == 0xff)
+                       break;
+               if (code == 0xfd) {
+                       if ((ret = fread(&mem_addr, 1, 4, f)) <= 0)
+                               break;
+                       continue;
+               }
+               if (code == 0xfe) {
+                       if ((ret = fread(&mem_val, 1, 4, f)) <= 0)
+                               break;
+                       continue;
+               }
+               if ((ret = fread(&allregs_e[code], 1, 4, f)) <= 0)
+                       break;
+       }
+
+       if (ret <= 0) {
+               printf("EOF?\n");
+               goto end;
+       }
+
+       psxRegs.code = rregs.code; // don't care
+psxRegs.cycle = rregs.cycle;
+psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count
+
+//if (psxRegs.cycle == 166172) breakme();
+//if (psxRegs.cycle > 11296376) printf("pc=%08x %u  %08x\n", psxRegs.pc, psxRegs.cycle, psxRegs.interrupt);
+
+       mem_addr &= 0x1ffffc;
+
+       if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 &&
+                       mem_val == *(u32 *)(psxM + mem_addr)
+          ) {
+               failcount = 0;
+               goto ok;
+       }
+
+       for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) {
+               if (allregs_p[i] != allregs_e[i]) {
+                       printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n",
+                               regnames[i], allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle);
+                       bad++;
+               }
+       }
+
+       if (mem_val != *(u32 *)(psxM + mem_addr)) {
+               printf("bad mem @%08x: %08x %08x\n", mem_addr, *(u32 *)(psxM + mem_addr), mem_val);
+               goto end;
+       }
+
+       if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) {
+               printf("-- %d\n", bad);
+               failcount++;
+               goto ok;
+       }
+
+end:
+       printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle);
+       dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000);
+       dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
+       exit(1);
+ok:
+       psxRegs.cycle = rregs.cycle + 2; // sync timing
+       ppc = psxRegs.pc;
+}
+
+#endif
index f5e4b55..1657668 100644 (file)
@@ -1,3 +1,4 @@
+#include "new_dynarec.h"
 #include "../r3000a.h"
 
 extern char invalid_code[0x100000];
@@ -13,6 +14,7 @@ extern int reg[];
 extern int hi, lo;
 
 /* same as psxRegs.CP0.n.* */
+extern int reg_cop0[];
 #define Status   psxRegs.CP0.n.Status
 #define Cause    psxRegs.CP0.n.Cause
 #define EPC      psxRegs.CP0.n.EPC
@@ -38,10 +40,13 @@ extern unsigned int word;   /* write */
 extern unsigned short hword;
 extern unsigned char byte;
 
-/* cycles */
+/* cycles/irqs */
 extern unsigned int next_interupt;
+extern int pending_exception;
 
 /* called by drc */
-void MFC0();
-void MTC0();
+void MTC0_();
+#define MTC0 MTC0_ /* don't call interpreter with wrong args */
 
+/* misc */
+extern void *psxHLEt_addr;
index f1b0f8c..3418ae9 100644 (file)
@@ -58,6 +58,8 @@ rdram = 0x80000000
        .global memory_map
        /* psx */
        .global psxRegs
+       .global psxHLEt_addr
+       .global code
 
        .bss
        .align  4
@@ -77,10 +79,7 @@ last_count = cycle_count + 4
 pending_exception = last_count + 4
        .type   pending_exception, %object
        .size   pending_exception, 4
-pcaddr = pending_exception + 4
-       .type   pcaddr, %object
-       .size   pcaddr, 4
-stop = pcaddr + 4
+stop = pending_exception + 4
        .type   stop, %object
        .size   stop, 4
 invc_ptr = stop + 4
@@ -118,13 +117,13 @@ psxRegs = reg
        .type   reg, %object
        .size   reg, 128
        .size   psxRegs, psxRegs_end-psxRegs
-hi = reg + 128
-       .type   hi, %object
-       .size   hi, 4
-lo = hi + 4
+lo = reg + 128
        .type   lo, %object
        .size   lo, 4
-reg_cop0 = lo + 4
+hi = lo + 4
+       .type   hi, %object
+       .size   hi, 4
+reg_cop0 = hi + 4
        .type   reg_cop0, %object
        .size   reg_cop0, 128
 reg_cop2d = reg_cop0 + 128
@@ -134,11 +133,13 @@ reg_cop2c = reg_cop2d + 128
        .type   reg_cop2c, %object
        .size   reg_cop2c, 128
 PC = reg_cop2c + 128
+pcaddr = PC
        .type   PC, %object
        .size   PC, 4
 code = PC + 4
        .type   code, %object
        .size   code, 4
+.global cycle
 cycle = code + 4
        .type   cycle, %object
        .size   cycle, 4
@@ -150,7 +151,10 @@ intCycle = interrupt + 4
        .size   intCycle, 128
 psxRegs_end = intCycle + 128
 
-align0 = psxRegs_end /* just for alignment */
+psxHLEt_addr = psxRegs_end
+       .type   psxHLEt_addr, %object
+       .size   psxHLEt_addr, 4
+align0 = psxHLEt_addr + 4 /* just for alignment */
        .type   align0, %object
        .size   align0, 4
 branch_target = align0 + 4
@@ -284,6 +288,7 @@ exec_pagefault:
        bl      get_addr_ht
        mov     pc, r0
        .size   exec_pagefault, .-exec_pagefault
+
 /* Special dynamic linker for the case where a page fault
    may occur in a branch delay slot */
        .global dyna_linker_ds
@@ -386,6 +391,7 @@ dyna_linker_ds:
        .word   jump_dirty
 .htptr:
        .word   hash_table
+
        .align  2
        .global jump_vaddr_r0
        .type   jump_vaddr_r0, %function
@@ -486,6 +492,7 @@ jump_vaddr:
        ldr     r10, [fp, #cycle_count-dynarec_local]
        mov     pc, r0
        .size   jump_vaddr, .-jump_vaddr
+
        .align  2
        .global verify_code_ds
        .type   verify_code_ds, %function
@@ -495,30 +502,6 @@ verify_code_ds:
        .global verify_code_vm
        .type   verify_code_vm, %function
 verify_code_vm:
-       /* r0 = instruction pointer (virtual address) */
-       /* r1 = source (virtual address) */
-       /* r2 = target */
-       /* r3 = length */
-       cmp     r1, #0xC0000000
-       blt     verify_code
-       add     r12, fp, #memory_map-dynarec_local
-       lsr     r4, r1, #12
-       add     r5, r1, r3
-       sub     r5, #1
-       ldr     r6, [r12, r4, lsl #2]
-       lsr     r5, r5, #12
-       movs    r7, r6
-       bmi     .D5
-       add     r1, r1, r6, lsl #2
-       lsl     r6, r6, #2
-.D1:
-       add     r4, r4, #1
-       teq     r6, r7, lsl #2
-       bne     .D5
-       ldr     r7, [r12, r4, lsl #2]
-       cmp     r4, r5
-       bls     .D1
-       .size   verify_code_vm, .-verify_code_vm
        .global verify_code
        .type   verify_code, %function
 verify_code:
@@ -555,6 +538,8 @@ verify_code:
        bl      get_addr
        mov     pc, r0
        .size   verify_code, .-verify_code
+       .size   verify_code_vm, .-verify_code_vm
+
        .align  2
        .global cc_interrupt
        .type   cc_interrupt, %function
@@ -603,8 +588,8 @@ cc_interrupt:
        tst     r5, #31
        bne     .E5
        b       .E1
-
        .size   cc_interrupt, .-cc_interrupt
+
        .align  2
        .global do_interrupt
        .type   do_interrupt, %function
@@ -631,7 +616,7 @@ fp_exception:
        add     r2, r2, #0x2c
        str     r1, [fp, #reg_cop0+48-dynarec_local] /* Status */
        str     r2, [fp, #reg_cop0+52-dynarec_local] /* Cause */
-       add     r0, r3, #0x180
+       add     r0, r3, #0x80
        bl      get_addr_ht
        mov     pc, r0
        .size   fp_exception, .-fp_exception
@@ -642,6 +627,7 @@ fp_exception_ds:
        mov     r2, #0x90000000 /* Set high bit if delay slot */
        b       .E7
        .size   fp_exception_ds, .-fp_exception_ds
+
        .align  2
        .global jump_syscall
        .type   jump_syscall, %function
@@ -653,17 +639,69 @@ jump_syscall:
        mov     r2, #0x20
        str     r1, [fp, #reg_cop0+48-dynarec_local] /* Status */
        str     r2, [fp, #reg_cop0+52-dynarec_local] /* Cause */
-       add     r0, r3, #0x180
+       add     r0, r3, #0x80
        bl      get_addr_ht
        mov     pc, r0
        .size   jump_syscall, .-jump_syscall
+       .align  2
+
+       .align  2
+       .global jump_syscall_hle
+       .type   jump_syscall_hle, %function
+jump_syscall_hle:
+       str     r0, [fp, #pcaddr-dynarec_local] /* PC must be set to EPC for psxException */
+       ldr     r2, [fp, #last_count-dynarec_local]
+       mov     r1, #0    /* in delay slot */
+       add     r2, r2, r10
+       mov     r0, #0x20 /* cause */
+       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
+       str     r2, [fp, #reg_cop0+36-dynarec_local] /* Count */
+       bl      psxException
+
+       /* note: psxException might do recorsive recompiler call from it's HLE code,
+        * so be ready for this */
+       ldr     r0, [fp, #pcaddr-dynarec_local]
+       mov     r10, #0 /* FIXME */
+       bl      get_addr_ht
+       mov     pc, r0
+       .size   jump_syscall_hle, .-jump_syscall_hle
+
+       .align  2
+       .global jump_hlecall
+       .type   jump_hlecall, %function
+jump_hlecall:
+       ldr     r2, [fp, #last_count-dynarec_local]
+       str     r0, [fp, #pcaddr-dynarec_local]
+       and     r1, r1, #7
+       add     r2, r2, r10
+       ldr     r3, [fp, #psxHLEt_addr-dynarec_local] /* psxHLEt */
+       str     r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */
+       str     r2, [fp, #reg_cop0+36-dynarec_local] /* Count */
+       mov     lr, pc
+       ldr     pc, [r3, r1, lsl #2]
+
+       ldr     r0, [fp, #pcaddr-dynarec_local]
+       mov     r10, #0 /* FIXME */
+       bl      get_addr_ht
+       mov     pc, r0
+       .size   jump_hlecall, .-jump_hlecall
+
+new_dyna_leave:
+       .align  2
+       .global new_dyna_leave
+       .type   new_dyna_leave, %function
+       ldr     r0, [fp, #last_count-dynarec_local]
+       add     r12, fp, #28
+       add     r10, r0, r10
+       str     r10, [fp, #reg_cop0+36-dynarec_local] /* Count */
+       ldmia   r12, {r4, r5, r6, r7, r8, r9, sl, fp, pc}
+       .size   new_dyna_leave, .-new_dyna_leave
+
        .align  2
        .global indirect_jump_indexed
        .type   indirect_jump_indexed, %function
 indirect_jump_indexed:
        ldr     r0, [r0, r1, lsl #2]
-       .size   indirect_jump_indexed, .-indirect_jump_indexed
-       .align  2
        .global indirect_jump
        .type   indirect_jump, %function
 indirect_jump:
@@ -672,6 +710,8 @@ indirect_jump:
        str     r2, [fp, #reg_cop0+36-dynarec_local] /* Count */
        mov     pc, r0
        .size   indirect_jump, .-indirect_jump
+       .size   indirect_jump_indexed, .-indirect_jump_indexed
+
        .align  2
        .global jump_eret
        .type   jump_eret, %function
@@ -689,26 +729,7 @@ jump_eret:
        subs    r10, r10, r1
        bpl     .E11
 .E8:
-       add     r6, fp, #reg+256-dynarec_local
-       mov     r5, #248
-       mov     r1, #0
-.E9:
-       ldr     r2, [r6, #-8]!
-       ldr     r3, [r6, #4]
-       eor     r3, r3, r2, asr #31
-       subs    r3, r3, #1
-       adc     r1, r1, r1
-       subs    r5, r5, #8
-       bne     .E9
-       ldr     r2, [fp, #hi-dynarec_local]
-       ldr     r3, [fp, #hi+4-dynarec_local]
-       eors    r3, r3, r2, asr #31
-       ldr     r2, [fp, #lo-dynarec_local]
-       ldreq   r3, [fp, #lo+4-dynarec_local]
-       eoreq   r3, r3, r2, asr #31
-       subs    r3, r3, #1
-       adc     r1, r1, r1
-       bl      get_addr_32
+       bl      get_addr
        mov     pc, r0
 .E11:
        str     r0, [fp, #pcaddr-dynarec_local]
@@ -716,6 +737,7 @@ jump_eret:
        ldr     r0, [fp, #pcaddr-dynarec_local]
        b       .E8
        .size   jump_eret, .-jump_eret
+
        .align  2
        .global new_dyna_start
        .type   new_dyna_start, %function
@@ -723,15 +745,18 @@ new_dyna_start:
        ldr     r12, .dlptr
        stmia   r12, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
        sub     fp, r12, #28
-       bl      new_recompile_block
-       ldr     r0, [fp, #next_interupt-dynarec_local]
+       ldr     r0, [fp, #pcaddr-dynarec_local]
+       /*bl    new_recompile_block*/
+       bl      get_addr_ht
+       ldr     r1, [fp, #next_interupt-dynarec_local]
        ldr     r10, [fp, #reg_cop0+36-dynarec_local] /* Count */
-       str     r0, [fp, #last_count-dynarec_local]
-       sub     r10, r10, r0
-       mov     pc, #0x2000000
+       str     r1, [fp, #last_count-dynarec_local]
+       sub     r10, r10, r1
+       mov     pc, r0
 .dlptr:
        .word   dynarec_local+28
        .size   new_dyna_start, .-new_dyna_start
+
        .align  2
        .global write_rdram_new
        .type   write_rdram_new, %function
@@ -761,18 +786,7 @@ write_rdramh_new:
        strh    r0, [r2]
        b       .E12
        .size   write_rdramh_new, .-write_rdramh_new
-       .align  2
-       .global write_rdramd_new
-       .type   write_rdramd_new, %function
-write_rdramd_new:
-       ldr     r2, [fp, #address-dynarec_local]
-/*     ldrd    r0, [fp, #dword-dynarec_local]*/
-       ldr     r0, [fp, #dword-dynarec_local]
-       ldr     r1, [fp, #dword+4-dynarec_local]
-       str     r0, [r2, #4]
-       str     r1, [r2]
-       b       .E12
-       .size   write_rdramd_new, .-write_rdramd_new
+
        .align  2
        .global do_invalidate
        .type   do_invalidate, %function
@@ -786,6 +800,7 @@ do_invalidate:
        beq     invalidate_block
        mov     pc, lr
        .size   do_invalidate, .-do_invalidate
+
        .align  2
        .global read_nomem_new
        .type   read_nomem_new, %function
@@ -808,6 +823,7 @@ read_nomemd_new:
        mov     pc, lr
 */
        .size   read_nomem_new, .-read_nomem_new
+/*
        .align  2
        .global read_nomemb_new
        .type   read_nomemb_new, %function
@@ -828,6 +844,7 @@ write_nomem_new:
        str     r0, [r2, r12, lsl #2]
        mov     pc, lr
        .size   write_nomem_new, .-write_nomem_new
+
        .align  2
        .global write_nomemb_new
        .type   write_nomemb_new, %function
@@ -849,6 +866,7 @@ write_nomemb_new:
        strb    r0, [r2, r12, lsl #2]
        mov     pc, lr
        .size   write_nomemb_new, .-write_nomemb_new
+
        .align  2
        .global write_nomemh_new
        .type   write_nomemh_new, %function
@@ -870,82 +888,7 @@ write_nomemh_new:
        strh    r0, [r2, r12]
        mov     pc, lr
        .size   write_nomemh_new, .-write_nomemh_new
-       .align  2
-       .global write_nomemd_new
-       .type   write_nomemd_new, %function
-write_nomemd_new:
-       str     r3, [fp, #24]
-       str     lr, [fp, #28]
-       bl      do_invalidate
-       ldr     r2, [fp, #address-dynarec_local]
-       add     r12, fp, #memory_map-dynarec_local
-       ldr     lr, [fp, #28]
-       lsr     r0, r2, #12
-       ldr     r3, [fp, #24]
-       ldr     r12, [r12, r0, lsl #2]
-       mov     r1, #0xc
-       lsls    r12, #2
-       bcs     tlb_exception
-       add     r3, r2, #4
-       ldr     r0, [fp, #dword+4-dynarec_local]
-       ldr     r1, [fp, #dword-dynarec_local]
-/*     strd    r0, [r2, r12]*/
-       str     r0, [r2, r12]
-       str     r1, [r3, r12]
-       mov     pc, lr
-       .size   write_nomemd_new, .-write_nomemd_new
-       .align  2
-       .global tlb_exception
-       .type   tlb_exception, %function
-tlb_exception:
-       /* r1 = cause */
-       /* r2 = address */
-       /* r3 = instr addr/flags */
-       ldr     r4, [fp, #reg_cop0+48-dynarec_local] /* Status */
-       add     r5, fp, #memory_map-dynarec_local
-       lsr     r6, r3, #12
-       orr     r1, r1, r3, lsl #31
-       orr     r4, r4, #2
-       ldr     r7, [r5, r6, lsl #2]
-       bic     r8, r3, #3
-       str     r4, [fp, #reg_cop0+48-dynarec_local] /* Status */
-       mov     r6, #0x6000000
-       str     r1, [fp, #reg_cop0+52-dynarec_local] /* Cause */
-       orr     r6, r6, #0x22
-       ldr     r0, [r8, r7, lsl #2]
-       add     r4, r8, r1, asr #29
-       add     r5, fp, #reg-dynarec_local
-       str     r4, [fp, #reg_cop0+56-dynarec_local] /* EPC */
-       mov     r7, #0xf8
-       ldr     r8, [fp, #reg_cop0+16-dynarec_local] /* Context */
-       lsl     r1, r0, #16
-       lsr     r4, r0, #26
-       and     r7, r7, r0, lsr #18
-       mvn     r9, #0xF000000F
-       sub     r2, r2, r1, asr #16
-       bic     r9, r9, #0x0F800000
-       rors    r6, r6, r4
-       mov     r0, #0x80000000
-       ldrcs   r2, [r5, r7]
-       bic     r8, r8, r9
-       tst     r3, #2
-       str     r2, [r5, r7]
-       add     r4, r2, r1, asr #16
-       add     r6, fp, #reg+4-dynarec_local
-       asr     r3, r2, #31
-       str     r4, [fp, #reg_cop0+32-dynarec_local] /* BadVAddr */
-       add     r0, r0, #0x180
-       and     r4, r9, r4, lsr #9
-       strne   r3, [r6, r7]
-       orr     r8, r8, r4
-       str     r8, [fp, #reg_cop0+16-dynarec_local] /* Context */
-       bl      get_addr_ht
-       ldr     r1, [fp, #next_interupt-dynarec_local]
-       ldr     r10, [fp, #reg_cop0+36-dynarec_local] /* Count */
-       str     r1, [fp, #last_count-dynarec_local]
-       sub     r10, r10, r1
-       mov     pc, r0  
-       .size   tlb_exception, .-tlb_exception
+*/
        .align  2
        .global breakpoint
        .type   breakpoint, %function
index 6f7c567..3249e57 100644 (file)
@@ -175,6 +175,7 @@ struct ll_entry
 #define OTHER 23  // Other
 #define SPAN 24   // Branch/delay slot spans 2 pages
 #define NI 25     // Not implemented
+#define HLECALL 26// PCSX fake opcodes for HLE
 
   /* stubs */
 #define CC_STUB 1
@@ -213,7 +214,10 @@ void cc_interrupt();
 void fp_exception();
 void fp_exception_ds();
 void jump_syscall();
+void jump_syscall_hle();
 void jump_eret();
+void jump_hlecall();
+void new_dyna_leave();
 
 // TLB
 void TLBWI_new();
@@ -399,6 +403,9 @@ void *get_addr_ht(u_int vaddr)
 
 void *get_addr_32(u_int vaddr,u_int flags)
 {
+#ifdef FORCE32
+  return get_addr(vaddr);
+#endif
   //printf("TRACE: count=%d next=%d (get_addr_32 %x,flags %x)\n",Count,next_interupt,vaddr,flags);
   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
@@ -716,7 +723,7 @@ int needed_again(int r, int i)
       j++;
       break;
     }
-    if(itype[i+j]==SYSCALL||((source[i+j]&0xfc00003f)==0x0d))
+    if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||((source[i+j]&0xfc00003f)==0x0d))
     {
       break;
     }
@@ -962,14 +969,10 @@ void ll_add(struct ll_entry **head,int vaddr,void *addr)
 // Add virtual address mapping for 32-bit compiled block
 void ll_add_32(struct ll_entry **head,int vaddr,u_int reg32,void *addr)
 {
-  struct ll_entry *new_entry;
-  new_entry=malloc(sizeof(struct ll_entry));
-  assert(new_entry!=NULL);
-  new_entry->vaddr=vaddr;
-  new_entry->reg32=reg32;
-  new_entry->addr=addr;
-  new_entry->next=*head;
-  *head=new_entry;
+  ll_add(head,vaddr,addr);
+#ifndef FORCE32
+  (*head)->reg32=reg32;
+#endif
 }
 
 // Check if an address is already compiled
@@ -1805,6 +1808,7 @@ void delayslot_alloc(struct regstat *current,int i)
     case RJUMP:
     case FJUMP:
     case SYSCALL:
+    case HLECALL:
     case SPAN:
       assem_debug("jump in the delay slot.  this shouldn't happen.\n");//exit(1);
       printf("Disabled speculative precompilation\n");
@@ -3581,7 +3585,18 @@ void syscall_assemble(int i,struct regstat *i_regs)
   assert(!is_delayslot);
   emit_movimm(start+i*4,EAX); // Get PC
   emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right?  There should probably be an extra cycle...
-  emit_jmp((int)jump_syscall);
+  emit_jmp((int)jump_syscall_hle); // XXX
+}
+
+void hlecall_assemble(int i,struct regstat *i_regs)
+{
+  signed char ccreg=get_reg(i_regs->regmap,CCREG);
+  assert(ccreg==HOST_CCREG);
+  assert(!is_delayslot);
+  emit_movimm(start+i*4+4,0); // Get PC
+  emit_movimm(source[i],1); // opcode
+  emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX
+  emit_jmp((int)jump_hlecall); // XXX
 }
 
 void ds_assemble(int i,struct regstat *i_regs)
@@ -3621,6 +3636,7 @@ void ds_assemble(int i,struct regstat *i_regs)
     case MOV:
       mov_assemble(i,i_regs);break;
     case SYSCALL:
+    case HLECALL:
     case SPAN:
     case UJUMP:
     case RJUMP:
@@ -4465,6 +4481,7 @@ void ds_assemble_entry(int i)
     case MOV:
       mov_assemble(t,&regs[t]);break;
     case SYSCALL:
+    case HLECALL:
     case SPAN:
     case UJUMP:
     case RJUMP:
@@ -6290,6 +6307,7 @@ static void pagespan_ds()
     case MOV:
       mov_assemble(0,&regs[0]);break;
     case SYSCALL:
+    case HLECALL:
     case SPAN:
     case UJUMP:
     case RJUMP:
@@ -6513,7 +6531,7 @@ void unneeded_registers(int istart,int iend,int r)
         }
       }
     }
-    else if(itype[i]==SYSCALL)
+    else if(itype[i]==SYSCALL||itype[i]==HLECALL)
     {
       // SYSCALL instruction (software interrupt)
       u=1;
@@ -6771,6 +6789,7 @@ static void provisional_32bit()
       case FCOMP:
         break;
       case SYSCALL:
+      case HLECALL:
         break;
       default:
         break;
@@ -6872,7 +6891,7 @@ static void provisional_r32()
         if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<<dep2[i+1];
       }
     }
-    else if(itype[i]==SYSCALL)
+    else if(itype[i]==SYSCALL||itype[i]==HLECALL)
     {
       // SYSCALL instruction (software interrupt)
       r32=0;
@@ -7226,7 +7245,7 @@ void clean_registers(int istart,int iend,int wr)
         }
       }
     }
-    else if(itype[i]==SYSCALL)
+    else if(itype[i]==SYSCALL||itype[i]==HLECALL)
     {
       // SYSCALL instruction (software interrupt)
       will_dirty_i=0;
@@ -7535,6 +7554,24 @@ int new_recompile_block(int addr)
   //rlist();
   start = (u_int)addr&~3;
   //assert(((u_int)addr&1)==0);
+#ifdef PCSX
+  if (Config.HLE && start == 0x80001000) {
+    // XXX: is this enough? Maybe check hleSoftCall?
+    u_int page=get_page(start);
+    ll_add(jump_in+page,start,out);
+    invalid_code[start>>12]=0;
+    emit_movimm(start,0);
+    emit_writeword(0,(int)&pcaddr);
+    emit_jmp((int)new_dyna_leave); // enough??
+    return 0;
+  }
+  else if ((u_int)addr < 0x00200000) {
+    // used for BIOS calls mostly?
+    source = (u_int *)((u_int)rdram+start-0);
+    pagelimit = 0x00200000;
+  }
+  else
+#endif
 #ifdef MUPEN64
   if ((int)addr >= 0xa4000000 && (int)addr < 0xa4001000) {
     source = (u_int *)((u_int)SP_DMEM+start-0xa4000000);
@@ -7855,11 +7892,14 @@ int new_recompile_block(int addr)
       case 0x37: strcpy(insn[i],"LD"); type=LOAD; break;
       case 0x38: strcpy(insn[i],"SC"); type=NI; break;
       case 0x39: strcpy(insn[i],"SWC1"); type=C1LS; break;
+#ifdef PCSX
+      case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break;
+#endif
       case 0x3C: strcpy(insn[i],"SCD"); type=NI; break;
       case 0x3D: strcpy(insn[i],"SDC1"); type=C1LS; break;
       case 0x3F: strcpy(insn[i],"SD"); type=STORE; break;
       default: strcpy(insn[i],"???"); type=NI;
-        assem_debug("NI %08x @%08x\n", source[i], addr + i*4);
+        printf("NI %08x @%08x\n", source[i], addr + i*4);
         break;
     }
     itype[i]=type;
@@ -8063,6 +8103,7 @@ int new_recompile_block(int addr)
         rt2[i]=0;
         break;
       case SYSCALL:
+      case HLECALL:
         rs1[i]=CCREG;
         rs2[i]=0;
         rt1[i]=0;
@@ -8106,6 +8147,7 @@ int new_recompile_block(int addr)
       if(i>MAXBLOCK/2) done=1;
     }
     if(i>0&&itype[i-1]==SYSCALL&&stop_after_jal) done=1;
+    if(itype[i-1]==HLECALL) done=1;
     assert(i<MAXBLOCK-1);
     if(start+i*4==pagelimit-4) done=1;
     assert(start+i*4<pagelimit);
@@ -8704,6 +8746,7 @@ int new_recompile_block(int addr)
           fcomp_alloc(&current,i);
           break;
         case SYSCALL:
+        case HLECALL:
           syscall_alloc(&current,i);
           break;
         case SPAN:
@@ -9069,7 +9112,7 @@ int new_recompile_block(int addr)
 
     // Count cycles in between branches
     ccadj[i]=cc;
-    if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL))
+    if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL||itype[i]==HLECALL))
     {
       cc=0;
     }
@@ -9168,7 +9211,7 @@ int new_recompile_block(int addr)
         }
       }
     }
-    else if(itype[i]==SYSCALL)
+    else if(itype[i]==SYSCALL||itype[i]==HLECALL)
     {
       // SYSCALL instruction (software interrupt)
       nr=0;
@@ -9930,7 +9973,7 @@ int new_recompile_block(int addr)
         if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<<dep2[i+1];
       }
     }
-    else if(itype[i]==SYSCALL)
+    else if(itype[i]==SYSCALL||itype[i]==HLECALL)
     {
       // SYSCALL instruction (software interrupt)
       r32=0;
@@ -10306,6 +10349,8 @@ int new_recompile_block(int addr)
           mov_assemble(i,&regs[i]);break;
         case SYSCALL:
           syscall_assemble(i,&regs[i]);break;
+        case HLECALL:
+          hlecall_assemble(i,&regs[i]);break;
         case UJUMP:
           ujump_assemble(i,&regs[i]);ds=1;break;
         case RJUMP:
index 8bb0dca..d139052 100644 (file)
@@ -2,3 +2,6 @@
 
 extern int pcaddr;
 extern int pending_exception;
+
+void new_dynarec_init();
+void new_dynarec_cleanup();