From: notaz Date: Tue, 30 Nov 2010 21:56:59 +0000 (+0200) Subject: drc: further hacks, hle handling X-Git-Tag: r1~61 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=commitdiff_plain;h=7139f3c8070e9aa14fd36c2451d7f10079caa37a;ds=sidebyside drc: further hacks, hle handling --- diff --git a/Makefile b/Makefile index 28b3d7c5..79a8f2de 100644 --- a/Makefile +++ b/Makefile @@ -23,8 +23,10 @@ OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \ libpcsxcore/sio.o libpcsxcore/socket.o libpcsxcore/spu.o # dynarec -OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o \ - libpcsxcore/new_dynarec/emu_if.o +ifndef NO_NEW_DRC +OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o +endif +OBJS += libpcsxcore/new_dynarec/emu_if.o libpcsxcore/new_dynarec/new_dynarec.o: libpcsxcore/new_dynarec/assem_arm.c # spu diff --git a/frontend/config.h b/frontend/config.h index e8b255df..768460ff 100644 --- a/frontend/config.h +++ b/frontend/config.h @@ -4,5 +4,5 @@ #define PACKAGE_NAME "pcsx" #define PACKAGE_VERSION "1.9" #define DEF_PLUGIN_DIR "." -#define EMU_LOG printf +//#define EMU_LOG printf #define USEOSS diff --git a/frontend/main.c b/frontend/main.c index 884b6176..fd1c1198 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -87,7 +87,6 @@ int main(int argc, char *argv[]) emuLog = stdout; SetIsoFile(NULL); - Config.PsxOut = 1; // read command line options for (i = 1; i < argc; i++) { @@ -353,19 +352,6 @@ void SysPrintf(const char *fmt, ...) { vsprintf(msg, fmt, list); va_end(list); - if (Config.PsxOut) { - static char linestart = 1; - int l = strlen(msg); - - printf(linestart ? " * %s" : "%s", msg); - - if (l > 0 && msg[l - 1] == '\n') { - linestart = 1; - } else { - linestart = 0; - } - } - fprintf(emuLog, "%s", msg); } diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index ea1da1bc..4bddd8c5 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -3242,12 +3242,11 @@ void cop0_assemble(int i,struct regstat *i_regs) char copr=(source[i]>>11)&0x1f; //assert(t>=0); // Why does this happen? OOT is weird if(t>=0) { -#ifdef MUPEN64 /// FIXME +#ifdef MUPEN64 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0); emit_movimm((source[i]>>11)&0x1f,1); emit_writeword(0,(int)&PC); emit_writebyte(1,(int)&(fake_pc.f.r.nrd)); -#endif if(copr==9) { emit_readword((int)&last_count,ECX); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc @@ -3257,6 +3256,9 @@ void cop0_assemble(int i,struct regstat *i_regs) } emit_call((int)MFC0); emit_readword((int)&readmem_dword,t); +#else + emit_readword((int)®_cop0+copr*4,t); +#endif } } else if(opcode2[i]==4) // MTC0 @@ -3272,7 +3274,11 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_writeword(0,(int)&PC); emit_writebyte(1,(int)&(fake_pc.f.r.nrd)); #endif - if(copr==9||copr==11||copr==12) { +#ifdef PCSX + emit_movimm(source[i],0); + emit_writeword(0,(int)&psxRegs.code); +#endif + if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&last_count,ECX); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc emit_add(HOST_CCREG,ECX,HOST_CCREG); @@ -3283,7 +3289,7 @@ void cop0_assemble(int i,struct regstat *i_regs) // so needs a special case to handle a pending interrupt. // The interrupt must be taken immediately, because a subsequent // instruction might disable interrupts again. - if(copr==12&&!is_delayslot) { + if(copr==12||copr==13) { emit_movimm(start+i*4+4,0); emit_movimm(0,1); emit_writeword(0,(int)&pcaddr); @@ -3292,7 +3298,7 @@ void cop0_assemble(int i,struct regstat *i_regs) //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); //else emit_call((int)MTC0); - if(copr==9||copr==11||copr==12) { + if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&Count,HOST_CCREG); emit_readword((int)&next_interupt,ECX); emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG); @@ -3300,14 +3306,14 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_writeword(ECX,(int)&last_count); emit_storereg(CCREG,HOST_CCREG); } - if(copr==12) { + if(copr==12||copr==13) { assert(!is_delayslot); emit_readword((int)&pending_exception,14); } emit_loadreg(rs1[i],s); if(get_reg(i_regs->regmap,rs1[i]|64)>=0) emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - if(copr==12) { + if(copr==12||copr==13) { emit_test(14,14); emit_jne((int)&do_interrupt); } diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index d97c0f25..f9c9b84c 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -12,6 +12,7 @@ //#define MUPEN64 #define FORCE32 1 #define DISABLE_COP1 1 +#define PCSX 1 #ifdef FORCE32 #define REG_SHIFT 2 @@ -45,7 +46,7 @@ extern char *invc_ptr; -#define BASE_ADDR 0x2000000 // Code generator target address +#define BASE_ADDR 0x1000000 // Code generator target address #define TARGET_SIZE_2 24 // 2^24 = 16 megabytes // This is defined in linkage_arm.s, but gcc -O3 likes this better diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index cee61cce..d0b991e2 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -1,37 +1,96 @@ +// cycles after syscall/hle? +// pending_exception? #include #include "emu_if.h" +#include "../psxmem.h" +#include "../psxhle.h" + +//#define memprintf printf +#define memprintf(...) +//#define evprintf printf +#define evprintf(...) + +//#define DRC_DBG char invalid_code[0x100000]; -void MFC0(void) +void MTC0_() { - printf("MFC0!\n"); + extern void psxMTC0(); + + printf("ari64 MTC0 %08x\n", psxRegs.code); + psxMTC0(); + pending_exception = 1; /* FIXME? */ } void gen_interupt() { - printf("gen_interupt\n"); + evprintf("ari64_gen_interupt\n"); + evprintf(" +ge %08x, %d->%d\n", psxRegs.pc, Count, next_interupt); +#ifdef DRC_DBG + psxRegs.cycle += 2; +#else + psxRegs.cycle = Count; // stupid +#endif + + psxBranchTest(); + + if (psxRegs.cycle != Count) { + printf("psxRegs.cycle != Count: %d != %d\n", psxRegs.cycle, Count); + Count = psxRegs.cycle; + } + + next_interupt = Count + psxNextCounter; + evprintf(" -ge %08x, %d->%d\n", psxRegs.pc, Count, next_interupt); + + pending_exception = 1; /* FIXME */ } void check_interupt() { - printf("check_interupt\n"); + printf("ari64_check_interupt\n"); } void read_nomem_new() { - printf("read_nomem_new\n"); + printf("ari64_read_nomem_new\n"); +} + +static void read_mem8() +{ + memprintf("ari64_read_mem8 %08x, PC~=%08x\n", address, psxRegs.pc); + readmem_word = psxMemRead8(address) & 0xff; +} + +static void read_mem16() +{ + memprintf("ari64_read_mem16 %08x, PC~=%08x\n", address, psxRegs.pc); + readmem_word = psxMemRead16(address) & 0xffff; +} + +static void read_mem32() +{ + memprintf("ari64_read_mem32 %08x, PC~=%08x\n", address, psxRegs.pc); + readmem_word = psxMemRead32(address); +} + +static void write_mem8() +{ + memprintf("ari64_write_mem8 %08x, %02x, PC~=%08x\n", address, byte, psxRegs.pc); + psxMemWrite8(address, byte); } -static void read_mem() +static void write_mem16() { - printf("read_mem %08x\n", address); + memprintf("ari64_write_mem16 %08x, %04x, PC~=%08x\n", address, hword, psxRegs.pc); + psxMemWrite16(address, hword); } -static void write_mem() +static void write_mem32() { - printf("write_mem %08x\n", address); + memprintf("ari64_write_mem32 %08x, %08x, PC~=%08x\n", address, word, psxRegs.pc); + psxMemWrite32(address, word); } void (*readmem[0x10000])(); @@ -48,13 +107,15 @@ static int ari64_init() new_dynarec_init(); for (i = 0; i < sizeof(readmem) / sizeof(readmem[0]); i++) { - readmem[i] = read_mem; - writemem[i] = write_mem; + readmemb[i] = read_mem8; + readmemh[i] = read_mem16; + readmem[i] = read_mem32; + writememb[i] = write_mem8; + writememh[i] = write_mem16; + writemem[i] = write_mem32; } - memcpy(readmemb, readmem, sizeof(readmem)); - memcpy(readmemh, readmem, sizeof(readmem)); - memcpy(writememb, writemem, sizeof(writemem)); - memcpy(writememh, writemem, sizeof(writemem)); + + psxHLEt_addr = (void *)psxHLEt; } static void ari64_reset() @@ -65,15 +126,18 @@ static void ari64_reset() static void ari64_execute() { -/* - FILE *f = fopen("/mnt/ntz/dev/pnd/tmp/ram.dump", "wb"); - fwrite((void *)0x80000000, 1, 0x200000, f); - fclose(f); - exit(1); -*/ - printf("psxNextsCounter %d, psxNextCounter %d\n", psxNextsCounter, psxNextCounter); - printf("ari64_execute %08x\n", psxRegs.pc); + /* TODO: get rid of this cycle counter copying */ + Count = psxRegs.cycle; + next_interupt = Count + psxNextCounter; + + evprintf("psxNextsCounter %d, psxNextCounter %d\n", psxNextsCounter, psxNextCounter); + evprintf("ari64_execute %08x, %d->%d\n", psxRegs.pc, Count, next_interupt); new_dyna_start(psxRegs.pc); + evprintf("ari64_execute end %08x, %d->%d\n", psxRegs.pc, Count, next_interupt); + +#ifndef DRC_DBG + psxRegs.cycle = Count; +#endif } static void ari64_clear(u32 Addr, u32 Size) @@ -85,11 +149,217 @@ static void ari64_shutdown() new_dynarec_cleanup(); } +extern void intExecute(); +extern void intExecuteT(); +extern void intExecuteBlock(); +extern void intExecuteBlockT(); +#ifndef DRC_DBG +#define intExecuteT intExecute +#define intExecuteBlockT intExecuteBlock +#endif + R3000Acpu psxRec = { ari64_init, ari64_reset, +#if 1 ari64_execute, -// TODO recExecuteBlock, + ari64_execute, +#else + intExecuteT, + intExecuteBlockT, +#endif ari64_clear, ari64_shutdown }; + +// TODO: rm +#ifndef DRC_DBG +void do_insn_trace() {} +void do_insn_cmp() {} +#endif + +#if defined(__x86_64__) || defined(__i386__) +unsigned int address, readmem_word, word; +unsigned short hword; +unsigned char byte; +int pending_exception; +unsigned int next_interupt; +void *psxHLEt_addr; +void new_dynarec_init() {} +int new_dyna_start() {} +void new_dynarec_cleanup() {} +#endif + +#ifdef DRC_DBG + +#include +static FILE *f; +extern u32 last_io_addr; + +static void dump_mem(const char *fname, void *mem, size_t size) +{ + FILE *f1 = fopen(fname, "wb"); + fwrite(mem, 1, size, f1); + fclose(f1); +} + +void do_insn_trace(void) +{ + static psxRegisters oldregs; + static u32 old_io_addr = (u32)-1; + static u32 old_io_data = 0xbad0c0de; + u32 *allregs_p = (void *)&psxRegs; + u32 *allregs_o = (void *)&oldregs; + u32 *io_data; + int i; + u8 byte; + +//last_io_addr = 0x5e2c8; + if (f == NULL) + f = fopen("tracelog", "wb"); + + oldregs.code = psxRegs.code; // don't care + for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { + if (allregs_p[i] != allregs_o[i]) { + fwrite(&i, 1, 1, f); + fwrite(&allregs_p[i], 1, 4, f); + allregs_o[i] = allregs_p[i]; + } + } + if (old_io_addr != last_io_addr) { + byte = 0xfd; + fwrite(&byte, 1, 1, f); + fwrite(&last_io_addr, 1, 4, f); + old_io_addr = last_io_addr; + } + io_data = (void *)(psxM + (last_io_addr&0x1ffffc)); + if (old_io_data != *io_data) { + byte = 0xfe; + fwrite(&byte, 1, 1, f); + fwrite(io_data, 1, 4, f); + old_io_data = *io_data; + } + byte = 0xff; + fwrite(&byte, 1, 1, f); + +#if 0 + if (psxRegs.cycle == 190230) { + dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000); + printf("dumped\n"); + exit(1); + } +#endif +} + +static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "lo", "hi", + "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7", + "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15", + "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23", + "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31", + + "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7", + "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15", + "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23", + "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31", + + "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7", + "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15", + "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23", + "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31", + + "PC", "code", "cycle", "interrupt", +}; + +void breakme() {} + +void do_insn_cmp(void) +{ + static psxRegisters rregs; + static u32 mem_addr, mem_val; + u32 *allregs_p = (void *)&psxRegs; + u32 *allregs_e = (void *)&rregs; + static u32 ppc, failcount; + int i, ret, bad = 0; + u8 code; + + if (f == NULL) + f = fopen("tracelog", "rb"); + + while (1) { + if ((ret = fread(&code, 1, 1, f)) <= 0) + break; + if (ret <= 0) + break; + if (code == 0xff) + break; + if (code == 0xfd) { + if ((ret = fread(&mem_addr, 1, 4, f)) <= 0) + break; + continue; + } + if (code == 0xfe) { + if ((ret = fread(&mem_val, 1, 4, f)) <= 0) + break; + continue; + } + if ((ret = fread(&allregs_e[code], 1, 4, f)) <= 0) + break; + } + + if (ret <= 0) { + printf("EOF?\n"); + goto end; + } + + psxRegs.code = rregs.code; // don't care +psxRegs.cycle = rregs.cycle; +psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count + +//if (psxRegs.cycle == 166172) breakme(); +//if (psxRegs.cycle > 11296376) printf("pc=%08x %u %08x\n", psxRegs.pc, psxRegs.cycle, psxRegs.interrupt); + + mem_addr &= 0x1ffffc; + + if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && + mem_val == *(u32 *)(psxM + mem_addr) + ) { + failcount = 0; + goto ok; + } + + for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { + if (allregs_p[i] != allregs_e[i]) { + printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n", + regnames[i], allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); + bad++; + } + } + + if (mem_val != *(u32 *)(psxM + mem_addr)) { + printf("bad mem @%08x: %08x %08x\n", mem_addr, *(u32 *)(psxM + mem_addr), mem_val); + goto end; + } + + if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { + printf("-- %d\n", bad); + failcount++; + goto ok; + } + +end: + printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); + exit(1); +ok: + psxRegs.cycle = rregs.cycle + 2; // sync timing + ppc = psxRegs.pc; +} + +#endif diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index f5e4b553..1657668a 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -1,3 +1,4 @@ +#include "new_dynarec.h" #include "../r3000a.h" extern char invalid_code[0x100000]; @@ -13,6 +14,7 @@ extern int reg[]; extern int hi, lo; /* same as psxRegs.CP0.n.* */ +extern int reg_cop0[]; #define Status psxRegs.CP0.n.Status #define Cause psxRegs.CP0.n.Cause #define EPC psxRegs.CP0.n.EPC @@ -38,10 +40,13 @@ extern unsigned int word; /* write */ extern unsigned short hword; extern unsigned char byte; -/* cycles */ +/* cycles/irqs */ extern unsigned int next_interupt; +extern int pending_exception; /* called by drc */ -void MFC0(); -void MTC0(); +void MTC0_(); +#define MTC0 MTC0_ /* don't call interpreter with wrong args */ +/* misc */ +extern void *psxHLEt_addr; diff --git a/libpcsxcore/new_dynarec/linkage_arm.s b/libpcsxcore/new_dynarec/linkage_arm.s index f1b0f8cf..3418ae9a 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.s +++ b/libpcsxcore/new_dynarec/linkage_arm.s @@ -58,6 +58,8 @@ rdram = 0x80000000 .global memory_map /* psx */ .global psxRegs + .global psxHLEt_addr + .global code .bss .align 4 @@ -77,10 +79,7 @@ last_count = cycle_count + 4 pending_exception = last_count + 4 .type pending_exception, %object .size pending_exception, 4 -pcaddr = pending_exception + 4 - .type pcaddr, %object - .size pcaddr, 4 -stop = pcaddr + 4 +stop = pending_exception + 4 .type stop, %object .size stop, 4 invc_ptr = stop + 4 @@ -118,13 +117,13 @@ psxRegs = reg .type reg, %object .size reg, 128 .size psxRegs, psxRegs_end-psxRegs -hi = reg + 128 - .type hi, %object - .size hi, 4 -lo = hi + 4 +lo = reg + 128 .type lo, %object .size lo, 4 -reg_cop0 = lo + 4 +hi = lo + 4 + .type hi, %object + .size hi, 4 +reg_cop0 = hi + 4 .type reg_cop0, %object .size reg_cop0, 128 reg_cop2d = reg_cop0 + 128 @@ -134,11 +133,13 @@ reg_cop2c = reg_cop2d + 128 .type reg_cop2c, %object .size reg_cop2c, 128 PC = reg_cop2c + 128 +pcaddr = PC .type PC, %object .size PC, 4 code = PC + 4 .type code, %object .size code, 4 +.global cycle cycle = code + 4 .type cycle, %object .size cycle, 4 @@ -150,7 +151,10 @@ intCycle = interrupt + 4 .size intCycle, 128 psxRegs_end = intCycle + 128 -align0 = psxRegs_end /* just for alignment */ +psxHLEt_addr = psxRegs_end + .type psxHLEt_addr, %object + .size psxHLEt_addr, 4 +align0 = psxHLEt_addr + 4 /* just for alignment */ .type align0, %object .size align0, 4 branch_target = align0 + 4 @@ -284,6 +288,7 @@ exec_pagefault: bl get_addr_ht mov pc, r0 .size exec_pagefault, .-exec_pagefault + /* Special dynamic linker for the case where a page fault may occur in a branch delay slot */ .global dyna_linker_ds @@ -386,6 +391,7 @@ dyna_linker_ds: .word jump_dirty .htptr: .word hash_table + .align 2 .global jump_vaddr_r0 .type jump_vaddr_r0, %function @@ -486,6 +492,7 @@ jump_vaddr: ldr r10, [fp, #cycle_count-dynarec_local] mov pc, r0 .size jump_vaddr, .-jump_vaddr + .align 2 .global verify_code_ds .type verify_code_ds, %function @@ -495,30 +502,6 @@ verify_code_ds: .global verify_code_vm .type verify_code_vm, %function verify_code_vm: - /* r0 = instruction pointer (virtual address) */ - /* r1 = source (virtual address) */ - /* r2 = target */ - /* r3 = length */ - cmp r1, #0xC0000000 - blt verify_code - add r12, fp, #memory_map-dynarec_local - lsr r4, r1, #12 - add r5, r1, r3 - sub r5, #1 - ldr r6, [r12, r4, lsl #2] - lsr r5, r5, #12 - movs r7, r6 - bmi .D5 - add r1, r1, r6, lsl #2 - lsl r6, r6, #2 -.D1: - add r4, r4, #1 - teq r6, r7, lsl #2 - bne .D5 - ldr r7, [r12, r4, lsl #2] - cmp r4, r5 - bls .D1 - .size verify_code_vm, .-verify_code_vm .global verify_code .type verify_code, %function verify_code: @@ -555,6 +538,8 @@ verify_code: bl get_addr mov pc, r0 .size verify_code, .-verify_code + .size verify_code_vm, .-verify_code_vm + .align 2 .global cc_interrupt .type cc_interrupt, %function @@ -603,8 +588,8 @@ cc_interrupt: tst r5, #31 bne .E5 b .E1 - .size cc_interrupt, .-cc_interrupt + .align 2 .global do_interrupt .type do_interrupt, %function @@ -631,7 +616,7 @@ fp_exception: add r2, r2, #0x2c str r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ str r2, [fp, #reg_cop0+52-dynarec_local] /* Cause */ - add r0, r3, #0x180 + add r0, r3, #0x80 bl get_addr_ht mov pc, r0 .size fp_exception, .-fp_exception @@ -642,6 +627,7 @@ fp_exception_ds: mov r2, #0x90000000 /* Set high bit if delay slot */ b .E7 .size fp_exception_ds, .-fp_exception_ds + .align 2 .global jump_syscall .type jump_syscall, %function @@ -653,17 +639,69 @@ jump_syscall: mov r2, #0x20 str r1, [fp, #reg_cop0+48-dynarec_local] /* Status */ str r2, [fp, #reg_cop0+52-dynarec_local] /* Cause */ - add r0, r3, #0x180 + add r0, r3, #0x80 bl get_addr_ht mov pc, r0 .size jump_syscall, .-jump_syscall + .align 2 + + .align 2 + .global jump_syscall_hle + .type jump_syscall_hle, %function +jump_syscall_hle: + str r0, [fp, #pcaddr-dynarec_local] /* PC must be set to EPC for psxException */ + ldr r2, [fp, #last_count-dynarec_local] + mov r1, #0 /* in delay slot */ + add r2, r2, r10 + mov r0, #0x20 /* cause */ + str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ + str r2, [fp, #reg_cop0+36-dynarec_local] /* Count */ + bl psxException + + /* note: psxException might do recorsive recompiler call from it's HLE code, + * so be ready for this */ + ldr r0, [fp, #pcaddr-dynarec_local] + mov r10, #0 /* FIXME */ + bl get_addr_ht + mov pc, r0 + .size jump_syscall_hle, .-jump_syscall_hle + + .align 2 + .global jump_hlecall + .type jump_hlecall, %function +jump_hlecall: + ldr r2, [fp, #last_count-dynarec_local] + str r0, [fp, #pcaddr-dynarec_local] + and r1, r1, #7 + add r2, r2, r10 + ldr r3, [fp, #psxHLEt_addr-dynarec_local] /* psxHLEt */ + str r2, [fp, #cycle-dynarec_local] /* PCSX cycle counter */ + str r2, [fp, #reg_cop0+36-dynarec_local] /* Count */ + mov lr, pc + ldr pc, [r3, r1, lsl #2] + + ldr r0, [fp, #pcaddr-dynarec_local] + mov r10, #0 /* FIXME */ + bl get_addr_ht + mov pc, r0 + .size jump_hlecall, .-jump_hlecall + +new_dyna_leave: + .align 2 + .global new_dyna_leave + .type new_dyna_leave, %function + ldr r0, [fp, #last_count-dynarec_local] + add r12, fp, #28 + add r10, r0, r10 + str r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ + ldmia r12, {r4, r5, r6, r7, r8, r9, sl, fp, pc} + .size new_dyna_leave, .-new_dyna_leave + .align 2 .global indirect_jump_indexed .type indirect_jump_indexed, %function indirect_jump_indexed: ldr r0, [r0, r1, lsl #2] - .size indirect_jump_indexed, .-indirect_jump_indexed - .align 2 .global indirect_jump .type indirect_jump, %function indirect_jump: @@ -672,6 +710,8 @@ indirect_jump: str r2, [fp, #reg_cop0+36-dynarec_local] /* Count */ mov pc, r0 .size indirect_jump, .-indirect_jump + .size indirect_jump_indexed, .-indirect_jump_indexed + .align 2 .global jump_eret .type jump_eret, %function @@ -689,26 +729,7 @@ jump_eret: subs r10, r10, r1 bpl .E11 .E8: - add r6, fp, #reg+256-dynarec_local - mov r5, #248 - mov r1, #0 -.E9: - ldr r2, [r6, #-8]! - ldr r3, [r6, #4] - eor r3, r3, r2, asr #31 - subs r3, r3, #1 - adc r1, r1, r1 - subs r5, r5, #8 - bne .E9 - ldr r2, [fp, #hi-dynarec_local] - ldr r3, [fp, #hi+4-dynarec_local] - eors r3, r3, r2, asr #31 - ldr r2, [fp, #lo-dynarec_local] - ldreq r3, [fp, #lo+4-dynarec_local] - eoreq r3, r3, r2, asr #31 - subs r3, r3, #1 - adc r1, r1, r1 - bl get_addr_32 + bl get_addr mov pc, r0 .E11: str r0, [fp, #pcaddr-dynarec_local] @@ -716,6 +737,7 @@ jump_eret: ldr r0, [fp, #pcaddr-dynarec_local] b .E8 .size jump_eret, .-jump_eret + .align 2 .global new_dyna_start .type new_dyna_start, %function @@ -723,15 +745,18 @@ new_dyna_start: ldr r12, .dlptr stmia r12, {r4, r5, r6, r7, r8, r9, sl, fp, lr} sub fp, r12, #28 - bl new_recompile_block - ldr r0, [fp, #next_interupt-dynarec_local] + ldr r0, [fp, #pcaddr-dynarec_local] + /*bl new_recompile_block*/ + bl get_addr_ht + ldr r1, [fp, #next_interupt-dynarec_local] ldr r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ - str r0, [fp, #last_count-dynarec_local] - sub r10, r10, r0 - mov pc, #0x2000000 + str r1, [fp, #last_count-dynarec_local] + sub r10, r10, r1 + mov pc, r0 .dlptr: .word dynarec_local+28 .size new_dyna_start, .-new_dyna_start + .align 2 .global write_rdram_new .type write_rdram_new, %function @@ -761,18 +786,7 @@ write_rdramh_new: strh r0, [r2] b .E12 .size write_rdramh_new, .-write_rdramh_new - .align 2 - .global write_rdramd_new - .type write_rdramd_new, %function -write_rdramd_new: - ldr r2, [fp, #address-dynarec_local] -/* ldrd r0, [fp, #dword-dynarec_local]*/ - ldr r0, [fp, #dword-dynarec_local] - ldr r1, [fp, #dword+4-dynarec_local] - str r0, [r2, #4] - str r1, [r2] - b .E12 - .size write_rdramd_new, .-write_rdramd_new + .align 2 .global do_invalidate .type do_invalidate, %function @@ -786,6 +800,7 @@ do_invalidate: beq invalidate_block mov pc, lr .size do_invalidate, .-do_invalidate + .align 2 .global read_nomem_new .type read_nomem_new, %function @@ -808,6 +823,7 @@ read_nomemd_new: mov pc, lr */ .size read_nomem_new, .-read_nomem_new +/* .align 2 .global read_nomemb_new .type read_nomemb_new, %function @@ -828,6 +844,7 @@ write_nomem_new: str r0, [r2, r12, lsl #2] mov pc, lr .size write_nomem_new, .-write_nomem_new + .align 2 .global write_nomemb_new .type write_nomemb_new, %function @@ -849,6 +866,7 @@ write_nomemb_new: strb r0, [r2, r12, lsl #2] mov pc, lr .size write_nomemb_new, .-write_nomemb_new + .align 2 .global write_nomemh_new .type write_nomemh_new, %function @@ -870,82 +888,7 @@ write_nomemh_new: strh r0, [r2, r12] mov pc, lr .size write_nomemh_new, .-write_nomemh_new - .align 2 - .global write_nomemd_new - .type write_nomemd_new, %function -write_nomemd_new: - str r3, [fp, #24] - str lr, [fp, #28] - bl do_invalidate - ldr r2, [fp, #address-dynarec_local] - add r12, fp, #memory_map-dynarec_local - ldr lr, [fp, #28] - lsr r0, r2, #12 - ldr r3, [fp, #24] - ldr r12, [r12, r0, lsl #2] - mov r1, #0xc - lsls r12, #2 - bcs tlb_exception - add r3, r2, #4 - ldr r0, [fp, #dword+4-dynarec_local] - ldr r1, [fp, #dword-dynarec_local] -/* strd r0, [r2, r12]*/ - str r0, [r2, r12] - str r1, [r3, r12] - mov pc, lr - .size write_nomemd_new, .-write_nomemd_new - .align 2 - .global tlb_exception - .type tlb_exception, %function -tlb_exception: - /* r1 = cause */ - /* r2 = address */ - /* r3 = instr addr/flags */ - ldr r4, [fp, #reg_cop0+48-dynarec_local] /* Status */ - add r5, fp, #memory_map-dynarec_local - lsr r6, r3, #12 - orr r1, r1, r3, lsl #31 - orr r4, r4, #2 - ldr r7, [r5, r6, lsl #2] - bic r8, r3, #3 - str r4, [fp, #reg_cop0+48-dynarec_local] /* Status */ - mov r6, #0x6000000 - str r1, [fp, #reg_cop0+52-dynarec_local] /* Cause */ - orr r6, r6, #0x22 - ldr r0, [r8, r7, lsl #2] - add r4, r8, r1, asr #29 - add r5, fp, #reg-dynarec_local - str r4, [fp, #reg_cop0+56-dynarec_local] /* EPC */ - mov r7, #0xf8 - ldr r8, [fp, #reg_cop0+16-dynarec_local] /* Context */ - lsl r1, r0, #16 - lsr r4, r0, #26 - and r7, r7, r0, lsr #18 - mvn r9, #0xF000000F - sub r2, r2, r1, asr #16 - bic r9, r9, #0x0F800000 - rors r6, r6, r4 - mov r0, #0x80000000 - ldrcs r2, [r5, r7] - bic r8, r8, r9 - tst r3, #2 - str r2, [r5, r7] - add r4, r2, r1, asr #16 - add r6, fp, #reg+4-dynarec_local - asr r3, r2, #31 - str r4, [fp, #reg_cop0+32-dynarec_local] /* BadVAddr */ - add r0, r0, #0x180 - and r4, r9, r4, lsr #9 - strne r3, [r6, r7] - orr r8, r8, r4 - str r8, [fp, #reg_cop0+16-dynarec_local] /* Context */ - bl get_addr_ht - ldr r1, [fp, #next_interupt-dynarec_local] - ldr r10, [fp, #reg_cop0+36-dynarec_local] /* Count */ - str r1, [fp, #last_count-dynarec_local] - sub r10, r10, r1 - mov pc, r0 - .size tlb_exception, .-tlb_exception +*/ .align 2 .global breakpoint .type breakpoint, %function diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 6f7c5673..3249e574 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -175,6 +175,7 @@ struct ll_entry #define OTHER 23 // Other #define SPAN 24 // Branch/delay slot spans 2 pages #define NI 25 // Not implemented +#define HLECALL 26// PCSX fake opcodes for HLE /* stubs */ #define CC_STUB 1 @@ -213,7 +214,10 @@ void cc_interrupt(); void fp_exception(); void fp_exception_ds(); void jump_syscall(); +void jump_syscall_hle(); void jump_eret(); +void jump_hlecall(); +void new_dyna_leave(); // TLB void TLBWI_new(); @@ -399,6 +403,9 @@ void *get_addr_ht(u_int vaddr) void *get_addr_32(u_int vaddr,u_int flags) { +#ifdef FORCE32 + return get_addr(vaddr); +#endif //printf("TRACE: count=%d next=%d (get_addr_32 %x,flags %x)\n",Count,next_interupt,vaddr,flags); int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; if(ht_bin[0]==vaddr) return (void *)ht_bin[1]; @@ -716,7 +723,7 @@ int needed_again(int r, int i) j++; break; } - if(itype[i+j]==SYSCALL||((source[i+j]&0xfc00003f)==0x0d)) + if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||((source[i+j]&0xfc00003f)==0x0d)) { break; } @@ -962,14 +969,10 @@ void ll_add(struct ll_entry **head,int vaddr,void *addr) // Add virtual address mapping for 32-bit compiled block void ll_add_32(struct ll_entry **head,int vaddr,u_int reg32,void *addr) { - struct ll_entry *new_entry; - new_entry=malloc(sizeof(struct ll_entry)); - assert(new_entry!=NULL); - new_entry->vaddr=vaddr; - new_entry->reg32=reg32; - new_entry->addr=addr; - new_entry->next=*head; - *head=new_entry; + ll_add(head,vaddr,addr); +#ifndef FORCE32 + (*head)->reg32=reg32; +#endif } // Check if an address is already compiled @@ -1805,6 +1808,7 @@ void delayslot_alloc(struct regstat *current,int i) case RJUMP: case FJUMP: case SYSCALL: + case HLECALL: case SPAN: assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1); printf("Disabled speculative precompilation\n"); @@ -3581,7 +3585,18 @@ void syscall_assemble(int i,struct regstat *i_regs) assert(!is_delayslot); emit_movimm(start+i*4,EAX); // Get PC emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_jmp((int)jump_syscall); + emit_jmp((int)jump_syscall_hle); // XXX +} + +void hlecall_assemble(int i,struct regstat *i_regs) +{ + signed char ccreg=get_reg(i_regs->regmap,CCREG); + assert(ccreg==HOST_CCREG); + assert(!is_delayslot); + emit_movimm(start+i*4+4,0); // Get PC + emit_movimm(source[i],1); // opcode + emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX + emit_jmp((int)jump_hlecall); // XXX } void ds_assemble(int i,struct regstat *i_regs) @@ -3621,6 +3636,7 @@ void ds_assemble(int i,struct regstat *i_regs) case MOV: mov_assemble(i,i_regs);break; case SYSCALL: + case HLECALL: case SPAN: case UJUMP: case RJUMP: @@ -4465,6 +4481,7 @@ void ds_assemble_entry(int i) case MOV: mov_assemble(t,®s[t]);break; case SYSCALL: + case HLECALL: case SPAN: case UJUMP: case RJUMP: @@ -6290,6 +6307,7 @@ static void pagespan_ds() case MOV: mov_assemble(0,®s[0]);break; case SYSCALL: + case HLECALL: case SPAN: case UJUMP: case RJUMP: @@ -6513,7 +6531,7 @@ void unneeded_registers(int istart,int iend,int r) } } } - else if(itype[i]==SYSCALL) + else if(itype[i]==SYSCALL||itype[i]==HLECALL) { // SYSCALL instruction (software interrupt) u=1; @@ -6771,6 +6789,7 @@ static void provisional_32bit() case FCOMP: break; case SYSCALL: + case HLECALL: break; default: break; @@ -6872,7 +6891,7 @@ static void provisional_r32() if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<>12]=0; + emit_movimm(start,0); + emit_writeword(0,(int)&pcaddr); + emit_jmp((int)new_dyna_leave); // enough?? + return 0; + } + else if ((u_int)addr < 0x00200000) { + // used for BIOS calls mostly? + source = (u_int *)((u_int)rdram+start-0); + pagelimit = 0x00200000; + } + else +#endif #ifdef MUPEN64 if ((int)addr >= 0xa4000000 && (int)addr < 0xa4001000) { source = (u_int *)((u_int)SP_DMEM+start-0xa4000000); @@ -7855,11 +7892,14 @@ int new_recompile_block(int addr) case 0x37: strcpy(insn[i],"LD"); type=LOAD; break; case 0x38: strcpy(insn[i],"SC"); type=NI; break; case 0x39: strcpy(insn[i],"SWC1"); type=C1LS; break; +#ifdef PCSX + case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break; +#endif case 0x3C: strcpy(insn[i],"SCD"); type=NI; break; case 0x3D: strcpy(insn[i],"SDC1"); type=C1LS; break; case 0x3F: strcpy(insn[i],"SD"); type=STORE; break; default: strcpy(insn[i],"???"); type=NI; - assem_debug("NI %08x @%08x\n", source[i], addr + i*4); + printf("NI %08x @%08x\n", source[i], addr + i*4); break; } itype[i]=type; @@ -8063,6 +8103,7 @@ int new_recompile_block(int addr) rt2[i]=0; break; case SYSCALL: + case HLECALL: rs1[i]=CCREG; rs2[i]=0; rt1[i]=0; @@ -8106,6 +8147,7 @@ int new_recompile_block(int addr) if(i>MAXBLOCK/2) done=1; } if(i>0&&itype[i-1]==SYSCALL&&stop_after_jal) done=1; + if(itype[i-1]==HLECALL) done=1; assert(i0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL)) + if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL||itype[i]==HLECALL)) { cc=0; } @@ -9168,7 +9211,7 @@ int new_recompile_block(int addr) } } } - else if(itype[i]==SYSCALL) + else if(itype[i]==SYSCALL||itype[i]==HLECALL) { // SYSCALL instruction (software interrupt) nr=0; @@ -9930,7 +9973,7 @@ int new_recompile_block(int addr) if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<