From a5cd72d0e598f037fd9d9f23948af5b2fb06e2eb Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 21 Jul 2023 23:33:10 +0300 Subject: [PATCH] drc: update according to interpreter much larger commit than I'd like but it's too much work to split it --- include/compiler_features.h | 16 +- libpcsxcore/new_dynarec/assem_arm.c | 156 +++- libpcsxcore/new_dynarec/assem_arm64.c | 128 ++- libpcsxcore/new_dynarec/linkage_arm.S | 37 +- libpcsxcore/new_dynarec/linkage_arm64.S | 37 +- libpcsxcore/new_dynarec/new_dynarec.c | 870 ++++++++---------- libpcsxcore/new_dynarec/patches/trace_drc_chk | 67 +- libpcsxcore/new_dynarec/patches/trace_intr | 212 +++-- libpcsxcore/psxinterpreter.c | 16 +- 9 files changed, 793 insertions(+), 746 deletions(-) diff --git a/include/compiler_features.h b/include/compiler_features.h index 38418664..753706d7 100644 --- a/include/compiler_features.h +++ b/include/compiler_features.h @@ -2,14 +2,28 @@ #ifdef __GNUC__ # define likely(x) __builtin_expect((x),1) # define unlikely(x) __builtin_expect((x),0) -# define noinline __attribute__((noinline)) +# ifdef __clang__ +# define noinline __attribute__((noinline)) +# else +# define noinline __attribute__((noinline,noclone)) +# endif +# define unused __attribute__((unused)) #else # define likely(x) (x) # define unlikely(x) (x) # define noinline +# define unused #endif #ifndef __has_builtin #define __has_builtin(x) 0 #endif +#if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) +#define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r)) +#define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r)) +#else +#define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);}) +#define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^ b) & (a ^ r) & (1u<<31);}) +#endif + diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 2847e516..88b2ff36 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -351,12 +351,24 @@ static void emit_neg(int rs, int rt) output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); } +static void emit_negs(int rs, int rt) +{ + assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); +} + static void emit_sub(int rs1,int rs2,int rt) { assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); } +static void emit_subs(int rs1,int rs2,int rt) +{ + assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); +} + static void emit_zeroreg(int rt) { assem_debug("mov %s,#0\n",regname[rt]); @@ -489,6 +501,12 @@ static void emit_not(int rs,int rt) output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); } +static void emit_mvneq(int rs,int rt) +{ + assem_debug("mvneq %s,%s\n",regname[rt],regname[rs]); + output_w32(0x01e00000|rd_rn_rm(rt,0,rs)); +} + static void emit_and(u_int rs1,u_int rs2,u_int rt) { assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); @@ -577,29 +595,37 @@ static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt) emit_addimm(rs, imm, rt); } -static void emit_addimm_and_set_flags(int imm,int rt) +static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt) { assert(imm>-65536&&imm<65536); u_int armval; - if(genimm(imm,&armval)) { - assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval); - }else if(imm<0) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00); + if (genimm(imm, &armval)) { + assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2900000|rd_rn_rm(rt,rs,0)|armval); + } else if (genimm(-imm, &armval)) { + assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2500000|rd_rn_rm(rt,rs,0)|armval); + } else if (rs != rt) { + emit_movimm(imm, rt); + emit_adds(rs, rt, rt); + } else if (imm < 0) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00); assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8)); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); - }else{ - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00); + } else { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8)); + output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); } } +static void emit_addimm_and_set_flags(int imm, u_int rt) +{ + emit_addimm_and_set_flags3(rt, imm, rt); +} + static void emit_addnop(u_int r) { assert(r<16); @@ -1012,6 +1038,14 @@ static void emit_jge(const void *a_) output_w32(0xaa000000|offset); } +static void emit_jo(const void *a_) +{ + int a = (int)a_; + assem_debug("bvs %x\n",a); + u_int offset=genjmp(a); + output_w32(0x6a000000|offset); +} + static void emit_jno(const void *a_) { int a = (int)a_; @@ -1218,7 +1252,7 @@ static void emit_readword(void *addr, int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); - assem_debug("ldr %s,fp+%d\n",regname[rt],offset); + assem_debug("ldr %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset)); output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); } #define emit_readptr emit_readword @@ -1278,7 +1312,7 @@ static void emit_writeword(int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; assert(offset<4096); - assem_debug("str %s,fp+%d\n",regname[rt],offset); + assem_debug("str %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset)); output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); } @@ -1631,7 +1665,7 @@ static void do_readstub(int n) u_int reglist=stubs[n].e; const signed char *i_regmap=i_regs->regmap; int rt; - if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) { + if(dops[i].itype==C2LS||dops[i].itype==LOADLR) { rt=get_reg(i_regmap,FTEMP); }else{ rt=get_reg(i_regmap,dops[i].rt1); @@ -1658,7 +1692,7 @@ static void do_readstub(int n) emit_shrimm(rs,12,temp2); emit_readword_dualindexedx4(temp,temp2,temp2); emit_lsls_imm(temp2,1,temp2); - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { switch(type) { case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; @@ -1691,7 +1725,7 @@ static void do_readstub(int n) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { mov_loadtype_adj(type,0,rt); } if(restore_jump) @@ -1787,7 +1821,7 @@ static void do_writestub(int n) u_int reglist=stubs[n].e; const signed char *i_regmap=i_regs->regmap; int rt,r; - if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if(dops[i].itype==C2LS) { rt=get_reg(i_regmap,r=FTEMP); }else{ rt=get_reg(i_regmap,r=dops[i].rs2); @@ -2095,15 +2129,11 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) // case 0x19: MULTU // case 0x1A: DIV // case 0x1B: DIVU - // case 0x1C: DMULT - // case 0x1D: DMULTU - // case 0x1E: DDIV - // case 0x1F: DDIVU if(dops[i].rs1&&dops[i].rs2) { - if((dops[i].opcode2&4)==0) // 32-bit + switch (dops[i].opcode2) { - if(dops[i].opcode2==0x18) // MULT + case 0x18: // MULT { signed char m1=get_reg(i_regs->regmap,dops[i].rs1); signed char m2=get_reg(i_regs->regmap,dops[i].rs2); @@ -2115,7 +2145,8 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) assert(lo>=0); emit_smull(m1,m2,hi,lo); } - if(dops[i].opcode2==0x19) // MULTU + break; + case 0x19: // MULTU { signed char m1=get_reg(i_regs->regmap,dops[i].rs1); signed char m2=get_reg(i_regs->regmap,dops[i].rs2); @@ -2127,14 +2158,16 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) assert(lo>=0); emit_umull(m1,m2,hi,lo); } - if(dops[i].opcode2==0x1A) // DIV + break; + case 0x1A: // DIV { signed char d1=get_reg(i_regs->regmap,dops[i].rs1); signed char d2=get_reg(i_regs->regmap,dops[i].rs2); - assert(d1>=0); - assert(d2>=0); signed char quotient=get_reg(i_regs->regmap,LOREG); signed char remainder=get_reg(i_regs->regmap,HIREG); + void *jaddr_div0; + assert(d1>=0); + assert(d2>=0); assert(quotient>=0); assert(remainder>=0); emit_movs(d1,remainder); @@ -2142,11 +2175,12 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) emit_negmi(quotient,quotient); // .. quotient and .. emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) emit_movs(d2,HOST_TEMPREG); - emit_jeq(out+52); // Division by zero + jaddr_div0 = out; + emit_jeq(0); // Division by zero emit_negsmi(HOST_TEMPREG,HOST_TEMPREG); #ifdef HAVE_ARMV5 emit_clz(HOST_TEMPREG,quotient); - emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); + emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); // shifted divisor #else emit_movimm(0,quotient); emit_addpl_imm(quotient,1,quotient); @@ -2162,23 +2196,27 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) emit_jcc(out-16); // -4 emit_teq(d1,d2); emit_negmi(quotient,quotient); + set_jump_target(jaddr_div0, out); emit_test(d1,d1); emit_negmi(remainder,remainder); } - if(dops[i].opcode2==0x1B) // DIVU + break; + case 0x1B: // DIVU { signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor - assert(d1>=0); - assert(d2>=0); signed char quotient=get_reg(i_regs->regmap,LOREG); signed char remainder=get_reg(i_regs->regmap,HIREG); + void *jaddr_div0; + assert(d1>=0); + assert(d2>=0); assert(quotient>=0); assert(remainder>=0); emit_mov(d1,remainder); emit_movimm(0xffffffff,quotient); // div0 case emit_test(d2,d2); - emit_jeq(out+40); // Division by zero + jaddr_div0 = out; + emit_jeq(0); // Division by zero #ifdef HAVE_ARMV5 emit_clz(d2,HOST_TEMPREG); emit_movimm(1<<31,quotient); @@ -2196,20 +2234,54 @@ static void multdiv_assemble_arm(int i, const struct regstat *i_regs) emit_adcs(quotient,quotient,quotient); emit_shrcc_imm(d2,1,d2); emit_jcc(out-16); // -4 + set_jump_target(jaddr_div0, out); } + break; } - else // 64-bit - assert(0); } else { - // Multiply by zero is zero. - // MIPS does not have a divide by zero exception. - // The result is undefined, we return zero. signed char hr=get_reg(i_regs->regmap,HIREG); signed char lr=get_reg(i_regs->regmap,LOREG); - if(hr>=0) emit_zeroreg(hr); - if(lr>=0) emit_zeroreg(lr); + if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0 + { + if (dops[i].rs1) { + signed char numerator = get_reg(i_regs->regmap, dops[i].rs1); + assert(numerator >= 0); + if (hr < 0) + hr = HOST_TEMPREG; + emit_movs(numerator, hr); + if (lr >= 0) { + if (dops[i].opcode2 == 0x1A) { // DIV + emit_movimm(0xffffffff, lr); + emit_negmi(lr, lr); + } + else + emit_movimm(~0, lr); + } + } + else { + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) emit_movimm(~0,lr); + } + } + else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0) + { + signed char denominator = get_reg(i_regs->regmap, dops[i].rs2); + assert(denominator >= 0); + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) { + emit_zeroreg(lr); + emit_test(denominator, denominator); + emit_mvneq(lr, lr); + } + } + else + { + // Multiply by zero is zero. + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) emit_zeroreg(lr); + } } } #define multdiv_assemble multdiv_assemble_arm diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 6f9c91d9..d35ad451 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -302,6 +302,12 @@ static void emit_add(u_int rs1, u_int rs2, u_int rt) output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt)); } +static void emit_adds(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_add64(u_int rs1, u_int rs2, u_int rt) { assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]); @@ -315,19 +321,37 @@ static void emit_adds64(u_int rs1, u_int rs2, u_int rt) } #define emit_adds_ptr emit_adds64 +static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +{ + assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); + output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); +} + static void emit_neg(u_int rs, u_int rt) { assem_debug("neg %s,%s\n",regname[rt],regname[rs]); output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt)); } +static void emit_negs(u_int rs, u_int rt) +{ + assem_debug("negs %s,%s\n",regname[rt],regname[rs]); + output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt)); +} + static void emit_sub(u_int rs1, u_int rs2, u_int rt) { assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } -static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +static void emit_subs(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); +} + +static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) { assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); @@ -422,7 +446,7 @@ static void emit_readword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 3) && offset <= 16380) { - assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset); + assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset)); output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt)); } else @@ -433,7 +457,7 @@ static void emit_readdword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 7) && offset <= 32760) { - assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset)); output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt)); } else @@ -482,7 +506,7 @@ static void emit_writeword(u_int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 3) && offset <= 16380) { - assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset); + assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset)); output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt)); } else @@ -493,7 +517,7 @@ static void emit_writedword(u_int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 7) && offset <= 32760) { - assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset)); output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt)); } else @@ -564,6 +588,12 @@ static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt)); } +static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +{ + assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); + output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); +} + static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt) { assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm); @@ -595,24 +625,35 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm); output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt)); } - else if (imm < 16777216) { - assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000); - output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt)); - if ((imm & 0xfff) || s) { - assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff); - output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt)); + else if (imm < 16777216 && (!(imm & 0xfff) || !s)) { + assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000); + output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt)); + if (imm & 0xfff) { + assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff); + output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt)); } } - else if (-imm < 16777216) { - assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000); - output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt)); - if ((imm & 0xfff) || s) { - assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff); - output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt)); + else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) { + assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000); + output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt)); + if (-imm & 0xfff) { + assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff); + output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt)); } } - else - abort(); + else { + u_int tmp = rt; + assert(!is64); + if (rs == rt) { + host_tempreg_acquire(); + tmp = HOST_TEMPREG; + } + emit_movimm(imm, tmp); + assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]); + output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt)); + if (tmp == HOST_TEMPREG) + host_tempreg_release(); + } } static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) @@ -639,6 +680,11 @@ static void emit_addimm_and_set_flags(int imm, u_int rt) emit_addimm_s(1, 0, rt, imm, rt); } +static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt) +{ + emit_addimm_s(1, 0, rs, imm, rt); +} + static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt) { const char *names[] = { "and", "orr", "eor", "ands" }; @@ -844,6 +890,12 @@ static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt) output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt)); } +static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_slti32(u_int rs,int imm,u_int rt) { if(rs!=rt) emit_zeroreg(rt); @@ -972,6 +1024,13 @@ static void emit_jge(const void *a) output_w32(0x54000000 | (offset << 5) | COND_GE); } +static void emit_jo(const void *a) +{ + assem_debug("bvs %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_VS); +} + static void emit_jno(const void *a) { assem_debug("bvc %p\n", a); @@ -1405,7 +1464,7 @@ static void do_readstub(int n) u_int reglist = stubs[n].e; const signed char *i_regmap = i_regs->regmap; int rt; - if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) { + if(dops[i].itype==C2LS||dops[i].itype==LOADLR) { rt=get_reg(i_regmap,FTEMP); }else{ rt=get_reg(i_regmap,dops[i].rt1); @@ -1435,7 +1494,7 @@ static void do_readstub(int n) emit_adds64(temp2,temp2,temp2); handler_jump=out; emit_jc(0); - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { switch(type) { case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break; case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break; @@ -1470,7 +1529,7 @@ static void do_readstub(int n) emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); // (no cycle reload after read) - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { loadstore_extend(type,0,rt); } if(restore_jump) @@ -1561,7 +1620,7 @@ static void do_writestub(int n) u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; - if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if(dops[i].itype==C2LS) { rt=get_reg(i_regmap,r=FTEMP); }else{ rt=get_reg(i_regmap,r=dops[i].rs2); @@ -1827,8 +1886,10 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) // div 0 quotient (remainder is already correct) host_tempreg_acquire(); - if (dops[i].opcode2 == 0x1A) // DIV - emit_sub_asrimm(0,numerator,31,HOST_TEMPREG); + if (dops[i].opcode2 == 0x1A) { // DIV + emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG); + emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG); + } else emit_movimm(~0,HOST_TEMPREG); emit_test(denominator,denominator); @@ -1852,8 +1913,10 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) if (hr >= 0) emit_mov(numerator,hr); if (lr >= 0) { - if (dops[i].opcode2 == 0x1A) // DIV - emit_sub_asrimm(0,numerator,31,lr); + if (dops[i].opcode2 == 0x1A) { // DIV + emit_add_lsrimm(WZR,numerator,31,lr); + emit_orn_asrimm(lr,numerator,31,lr); + } else emit_movimm(~0,lr); } @@ -1863,6 +1926,17 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) if (lr >= 0) emit_movimm(~0,lr); } } + else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0) + { + signed char denominator = get_reg(i_regs->regmap, dops[i].rs2); + assert(denominator >= 0); + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) { + emit_zeroreg(lr); + emit_test(denominator, denominator); + emit_csinvne_reg(lr, lr, lr); + } + } else { // Multiply by zero is zero. diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 7a6d2edd..f859817a 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -261,41 +261,28 @@ FUNCTION(cc_interrupt): .size cc_interrupt, .-cc_interrupt .align 2 -FUNCTION(fp_exception): - mov r2, #0x10000000 -.E7: - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - add r2, r2, #0x2c - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl ndrc_get_addr_ht - mov pc, r0 - .size fp_exception, .-fp_exception - .align 2 -FUNCTION(fp_exception_ds): - mov r2, #0x90000000 /* Set high bit if delay slot */ - b .E7 - .size fp_exception_ds, .-fp_exception_ds - - .align 2 +FUNCTION(jump_overflow_ds): + mov r0, #(12<<2) /* R3000E_Ov */ + mov r1, #1 + b call_psxException +FUNCTION(jump_overflow): + mov r0, #(12<<2) + mov r1, #0 + b call_psxException FUNCTION(jump_break_ds): - mov r0, #0x24 + mov r0, #(9<<2) /* R3000E_Bp */ mov r1, #1 b call_psxException FUNCTION(jump_break): - mov r0, #0x24 + mov r0, #(9<<2) mov r1, #0 b call_psxException FUNCTION(jump_syscall_ds): - mov r0, #0x20 + mov r0, #(8<<2) /* R3000E_Syscall */ mov r1, #2 b call_psxException FUNCTION(jump_syscall): - mov r0, #0x20 + mov r0, #(8<<2) mov r1, #0 call_psxException: diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index bc5f1151..38c78dc3 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -119,41 +119,28 @@ FUNCTION(cc_interrupt): .size cc_interrupt, .-cc_interrupt .align 2 -FUNCTION(fp_exception): - mov w2, #0x10000000 -0: - ldr w1, [rFP, #LO_reg_cop0+48] /* Status */ - mov w3, #0x80000000 - str w0, [rFP, #LO_reg_cop0+56] /* EPC */ - orr w1, w1, #2 - add w2, w2, #0x2c - str w1, [rFP, #LO_reg_cop0+48] /* Status */ - str w2, [rFP, #LO_reg_cop0+52] /* Cause */ - add w0, w3, #0x80 - bl ndrc_get_addr_ht - br x0 - .size fp_exception, .-fp_exception - .align 2 -FUNCTION(fp_exception_ds): - mov w2, #0x90000000 /* Set high bit if delay slot */ - b 0b - .size fp_exception_ds, .-fp_exception_ds - - .align 2 +FUNCTION(jump_overflow_ds): + mov w0, #(12<<2) /* R3000E_Ov */ + mov w1, #1 + b call_psxException +FUNCTION(jump_overflow): + mov w0, #(12<<2) + mov w1, #0 + b call_psxException FUNCTION(jump_break_ds): - mov w0, #0x24 + mov w0, #(9<<2) /* R3000E_Bp */ mov w1, #1 b call_psxException FUNCTION(jump_break): - mov w0, #0x24 + mov w0, #(9<<2) mov w1, #0 b call_psxException FUNCTION(jump_syscall_ds): - mov w0, #0x20 + mov w0, #(8<<2) /* R3000E_Syscall */ mov w1, #2 b call_psxException FUNCTION(jump_syscall): - mov w0, #0x20 + mov w0, #(8<<2) mov w1, #0 call_psxException: diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f5976462..067decb7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -39,14 +39,10 @@ static Jit g_jit; #include "../psxinterpreter.h" #include "../gte.h" #include "emu_if.h" // emulator interface +#include "linkage_offsets.h" +#include "compiler_features.h" #include "arm_features.h" -#define unused __attribute__((unused)) -#ifdef __clang__ -#define noinline __attribute__((noinline)) -#else -#define noinline __attribute__((noinline,noclone)) -#endif #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif @@ -59,6 +55,7 @@ static Jit g_jit; //#define DISASM //#define ASSEM_PRINT +//#define REGMAP_PRINT // with DISASM only //#define INV_DEBUG_W //#define STAT_PRINT @@ -135,19 +132,20 @@ static long ndrc_write_ofs; // stubs enum stub_type { CC_STUB = 1, - FP_STUB = 2, + //FP_STUB = 2, LOADB_STUB = 3, LOADH_STUB = 4, LOADW_STUB = 5, - LOADD_STUB = 6, + //LOADD_STUB = 6, LOADBU_STUB = 7, LOADHU_STUB = 8, STOREB_STUB = 9, STOREH_STUB = 10, STOREW_STUB = 11, - STORED_STUB = 12, + //STORED_STUB = 12, STORELR_STUB = 13, INVCODE_STUB = 14, + OVERFLOW_STUB = 15, }; // regmap_pre[i] - regs before [i] insn starts; dirty things here that @@ -163,7 +161,7 @@ struct regstat uint64_t dirty; uint64_t u; u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true - u_int isconst; // ... but isconst is false when r2 is known + u_int isconst; // ... but isconst is false when r2 is known (hr) u_int loadedconst; // host regs that have constants loaded //u_int waswritten; // MIPS regs that were used as store base before }; @@ -225,8 +223,8 @@ struct jump_info static struct decoded_insn { u_char itype; - u_char opcode; - u_char opcode2; + u_char opcode; // bits 31-26 + u_char opcode2; // (depends on opcode) u_char rs1; u_char rs2; u_char rt1; @@ -239,6 +237,9 @@ static struct decoded_insn u_char is_ujump:1; u_char is_load:1; u_char is_store:1; + u_char is_delay_load:1; // is_load + MFC/CFC + u_char is_exception:1; // unconditional, also interp. fallback + u_char may_except:1; // might generate an exception } dops[MAXBLOCK]; static u_char *out; @@ -350,7 +351,7 @@ static struct decoded_insn #define STORE 2 // Store #define LOADLR 3 // Unaligned load #define STORELR 4 // Unaligned store -#define MOV 5 // Move +#define MOV 5 // Move (hi/lo only) #define ALU 6 // Arithmetic/logic #define MULTDIV 7 // Multiply/divide #define SHIFT 8 // Shift by register @@ -361,16 +362,9 @@ static struct decoded_insn #define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ) #define SJUMP 14 // Conditional branch (regimm format) #define COP0 15 // Coprocessor 0 -#define COP1 16 // Coprocessor 1 -#define C1LS 17 // Coprocessor 1 load/store -//#define FJUMP 18 // Conditional branch (floating point) -//#define FLOAT 19 // Floating point unit -//#define FCONV 20 // Convert integer to float -//#define FCOMP 21 // Floating point compare (sets FSREG) +#define RFE 16 #define SYSCALL 22// SYSCALL,BREAK -#define OTHER 23 // Other -//#define SPAN 24 // Branch/delay slot spans 2 pages -#define NI 25 // Not implemented +#define OTHER 23 // Other/unknown - do nothing #define HLECALL 26// PCSX fake opcodes for HLE #define COP2 27 // Coprocessor 2 move #define C2LS 28 // Coprocessor 2 load/store @@ -388,12 +382,12 @@ static struct decoded_insn // asm linkage void dyna_linker(); void cc_interrupt(); -void fp_exception(); -void fp_exception_ds(); void jump_syscall (u_int u0, u_int u1, u_int pc); void jump_syscall_ds(u_int u0, u_int u1, u_int pc); void jump_break (u_int u0, u_int u1, u_int pc); void jump_break_ds(u_int u0, u_int u1, u_int pc); +void jump_overflow (u_int u0, u_int u1, u_int pc); +void jump_overflow_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); @@ -406,6 +400,7 @@ static void ndrc_write_invalidate_many(u_int addr, u_int end); static int new_recompile_block(u_int addr); static void invalidate_block(struct block_info *block); +static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_); // Needed by assembler static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); @@ -784,10 +779,10 @@ static void noinline *get_addr(u_int vaddr, int can_compile) return ndrc_get_addr_ht(vaddr); // generate an address error - psxRegs.CP0.n.SR |= 2; - psxRegs.CP0.n.Cause = (vaddr<<31) | (4<<2); - psxRegs.CP0.n.EPC = (vaddr&1) ? vaddr-5 : vaddr; - psxRegs.CP0.n.BadVAddr = vaddr & ~1; + psxRegs.CP0.n.Cause &= 0x300; + psxRegs.CP0.n.Cause |= R3000E_AdEL << 2; + psxRegs.CP0.n.EPC = vaddr; + psxRegs.pc = 0x80000080; return ndrc_get_addr_ht(0x80000080); } @@ -834,6 +829,12 @@ static signed char get_reg(const signed char regmap[], signed char r) #endif +// get reg suitable for writing +static signed char get_reg_w(const signed char regmap[], signed char r) +{ + return r == 0 ? -1 : get_reg(regmap, r); +} + // get reg as mask bit (1 << hr) static u_int get_regm(const signed char regmap[], signed char r) { @@ -1069,7 +1070,7 @@ static int needed_again(int r, int i) j++; break; } - if(dops[i+j].itype==SYSCALL||dops[i+j].itype==HLECALL||dops[i+j].itype==INTCALL||((source[i+j]&0xfc00003f)==0x0d)) + if (dops[i+j].is_exception) { break; } @@ -1197,6 +1198,8 @@ static const struct { FUNCNAME(jump_break_ds), FUNCNAME(jump_syscall), FUNCNAME(jump_syscall_ds), + FUNCNAME(jump_overflow), + FUNCNAME(jump_overflow_ds), FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), @@ -1214,8 +1217,43 @@ static const char *func_name(const void *a) return function_names[i].name; return ""; } + +static const char *fpofs_name(u_int ofs) +{ + u_int *p = (u_int *)&dynarec_local + ofs/sizeof(u_int); + static char buf[64]; + switch (ofs) { + #define ofscase(x) case LO_##x: return " ; " #x + ofscase(next_interupt); + ofscase(last_count); + ofscase(pending_exception); + ofscase(stop); + ofscase(address); + ofscase(lo); + ofscase(hi); + ofscase(PC); + ofscase(cycle); + ofscase(mem_rtab); + ofscase(mem_wtab); + ofscase(psxH_ptr); + ofscase(invc_ptr); + ofscase(ram_offset); + #undef ofscase + } + buf[0] = 0; + if (psxRegs.GPR.r <= p && p < &psxRegs.GPR.r[32]) + snprintf(buf, sizeof(buf), " ; r%d", (int)(p - psxRegs.GPR.r)); + else if (psxRegs.CP0.r <= p && p < &psxRegs.CP0.r[32]) + snprintf(buf, sizeof(buf), " ; cp0 $%d", (int)(p - psxRegs.CP0.r)); + else if (psxRegs.CP2D.r <= p && p < &psxRegs.CP2D.r[32]) + snprintf(buf, sizeof(buf), " ; cp2d $%d", (int)(p - psxRegs.CP2D.r)); + else if (psxRegs.CP2C.r <= p && p < &psxRegs.CP2C.r[32]) + snprintf(buf, sizeof(buf), " ; cp2c $%d", (int)(p - psxRegs.CP2C.r)); + return buf; +} #else #define func_name(x) "" +#define fpofs_name(x) "" #endif #ifdef __i386__ @@ -1933,6 +1971,10 @@ static void alu_alloc(struct regstat *current,int i) } alloc_reg(current,i,dops[i].rt1); } + if (!(dops[i].opcode2 & 1)) { + alloc_cc(current,i); // for exceptions + dirty_reg(current,CCREG); + } } if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { @@ -1955,9 +1997,6 @@ static void alu_alloc(struct regstat *current,int i) alloc_reg(current,i,dops[i].rt1); } } - if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU - assert(0); - } clear_const(current,dops[i].rs1); clear_const(current,dops[i].rs2); clear_const(current,dops[i].rt1); @@ -1969,10 +2008,7 @@ static void imm16_alloc(struct regstat *current,int i) if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); else dops[i].use_lt1=!!dops[i].rs1; if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1); - if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU - assert(0); - } - else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU + if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU clear_const(current,dops[i].rs1); clear_const(current,dops[i].rt1); } @@ -1991,6 +2027,14 @@ static void imm16_alloc(struct regstat *current,int i) set_const(current,dops[i].rt1,v+imm[i]); } else clear_const(current,dops[i].rt1); + if (dops[i].opcode == 0x08) { + alloc_cc(current,i); // for exceptions + dirty_reg(current,CCREG); + if (dops[i].rt1 == 0) { + alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; + } + } } else { set_const(current,dops[i].rt1,imm[i]<<16); // LUI @@ -2009,15 +2053,7 @@ static void load_alloc(struct regstat *current,int i) alloc_reg(current, i, ROREG); if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) { alloc_reg(current,i,dops[i].rt1); - assert(get_reg(current->regmap,dops[i].rt1)>=0); - if(dops[i].opcode==0x27||dops[i].opcode==0x37) // LWU/LD - { - assert(0); - } - else if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR - { - assert(0); - } + assert(get_reg_w(current->regmap, dops[i].rt1)>=0); dirty_reg(current,dops[i].rt1); // LWL/LWR need a temporary register for the old value if(dops[i].opcode==0x22||dops[i].opcode==0x26) @@ -2037,10 +2073,6 @@ static void load_alloc(struct regstat *current,int i) } alloc_reg_temp(current,i,-1); minimum_free_regs[i]=1; - if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR - { - assert(0); - } } } @@ -2067,12 +2099,6 @@ static void store_alloc(struct regstat *current,int i) minimum_free_regs[i]=1; } -static void c1ls_alloc(struct regstat *current,int i) -{ - clear_const(current,dops[i].rt1); - alloc_reg(current,i,CSREG); // Status -} - static void c2ls_alloc(struct regstat *current,int i) { clear_const(current,dops[i].rt1); @@ -2141,7 +2167,6 @@ static void cop0_alloc(struct regstat *current,int i) { if(dops[i].rt1) { clear_const(current,dops[i].rt1); - alloc_all(current,i); alloc_reg(current,i,dops[i].rt1); dirty_reg(current,dops[i].rt1); } @@ -2158,14 +2183,14 @@ static void cop0_alloc(struct regstat *current,int i) current->u&=~1LL; alloc_reg(current,i,0); } + minimum_free_regs[i] = HOST_REGS; } - else - { - // RFE - assert(dops[i].opcode2==0x10); - alloc_all(current,i); - } - minimum_free_regs[i]=HOST_REGS; +} + +static void rfe_alloc(struct regstat *current, int i) +{ + alloc_all(current, i); + minimum_free_regs[i] = HOST_REGS; } static void cop2_alloc(struct regstat *current,int i) @@ -2249,14 +2274,12 @@ static void delayslot_alloc(struct regstat *current,int i) case COP0: cop0_alloc(current,i); break; - case COP1: + case RFE: + rfe_alloc(current,i); break; case COP2: cop2_alloc(current,i); break; - case C1LS: - c1ls_alloc(current,i); - break; case C2LS: c2ls_alloc(current,i); break; @@ -2333,47 +2356,77 @@ static void pass_args(int a0, int a1) } } -static void alu_assemble(int i, const struct regstat *i_regs) +static void alu_assemble(int i, const struct regstat *i_regs, int ccadj_) { if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU - if(dops[i].rt1) { - signed char s1,s2,t; - t=get_reg(i_regs->regmap,dops[i].rt1); - if(t>=0) { - s1=get_reg(i_regs->regmap,dops[i].rs1); - s2=get_reg(i_regs->regmap,dops[i].rs2); - if(dops[i].rs1&&dops[i].rs2) { + int do_oflow = dops[i].may_except; // ADD/SUB with exceptions enabled + if (dops[i].rt1 || do_oflow) { + int do_exception_check = 0; + signed char s1, s2, t, tmp; + t = get_reg_w(i_regs->regmap, dops[i].rt1); + tmp = get_reg_temp(i_regs->regmap); + if (t < 0 && do_oflow) + t = tmp; + if (t >= 0) { + s1 = get_reg(i_regs->regmap, dops[i].rs1); + s2 = get_reg(i_regs->regmap, dops[i].rs2); + if (dops[i].rs1 && dops[i].rs2) { assert(s1>=0); assert(s2>=0); - if(dops[i].opcode2&2) emit_sub(s1,s2,t); - else emit_add(s1,s2,t); + if (dops[i].opcode2 & 2) { + if (do_oflow) { + emit_subs(s1, s2, tmp); + do_exception_check = 1; + } + else + emit_sub(s1,s2,t); + } + else { + if (do_oflow) { + emit_adds(s1, s2, tmp); + do_exception_check = 1; + } + else + emit_add(s1,s2,t); + } } else if(dops[i].rs1) { if(s1>=0) emit_mov(s1,t); else emit_loadreg(dops[i].rs1,t); } else if(dops[i].rs2) { - if(s2>=0) { - if(dops[i].opcode2&2) emit_neg(s2,t); - else emit_mov(s2,t); + if (s2 < 0) { + emit_loadreg(dops[i].rs2, t); + s2 = t; } - else { - emit_loadreg(dops[i].rs2,t); - if(dops[i].opcode2&2) emit_neg(t,t); + if (dops[i].opcode2 & 2) { + if (do_oflow) { + emit_negs(s2, tmp); + do_exception_check = 1; + } + else + emit_neg(s2, t); } + else if (s2 != t) + emit_mov(s2, t); } - else emit_zeroreg(t); + else + emit_zeroreg(t); + } + if (do_exception_check) { + void *jaddr = out; + emit_jo(0); + if (t >= 0 && tmp != t) + emit_mov(tmp, t); + add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0); } } } - if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU - assert(0); - } - if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU + else if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { signed char s1l,s2l,t; { - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(t>=0); if(t>=0) { s1l=get_reg(i_regs->regmap,dops[i].rs1); @@ -2406,10 +2459,10 @@ static void alu_assemble(int i, const struct regstat *i_regs) } } } - if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR + else if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR if(dops[i].rt1) { signed char s1l,s2l,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); { if(tl>=0) { s1l=get_reg(i_regs->regmap,dops[i].rs1); @@ -2473,12 +2526,12 @@ static void alu_assemble(int i, const struct regstat *i_regs) } } -static void imm16_assemble(int i, const struct regstat *i_regs) +static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) { if (dops[i].opcode==0x0f) { // LUI if(dops[i].rt1) { signed char t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(t>=0); if(t>=0) { if(!((i_regs->isconst>>t)&1)) @@ -2487,23 +2540,55 @@ static void imm16_assemble(int i, const struct regstat *i_regs) } } if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU - if(dops[i].rt1) { - signed char s,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + int is_addi = (dops[i].opcode == 0x08); + if (dops[i].rt1 || is_addi) { + signed char s, t, tmp; + t=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); if(dops[i].rs1) { - //assert(t>=0); - //assert(s>=0); + tmp = get_reg_temp(i_regs->regmap); + if (is_addi) { + assert(tmp >= 0); + if (t < 0) t = tmp; + } if(t>=0) { if(!((i_regs->isconst>>t)&1)) { - if(s<0) { + int sum, do_exception_check = 0; + if (s < 0) { if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); - emit_addimm(t,imm[i],t); - }else{ - if(!((i_regs->wasconst>>s)&1)) - emit_addimm(s,imm[i],t); + if (is_addi) { + emit_addimm_and_set_flags3(t, imm[i], tmp); + do_exception_check = 1; + } else - emit_movimm(constmap[i][s]+imm[i],t); + emit_addimm(t, imm[i], t); + } else { + if (!((i_regs->wasconst >> s) & 1)) { + if (is_addi) { + emit_addimm_and_set_flags3(s, imm[i], tmp); + do_exception_check = 1; + } + else + emit_addimm(s, imm[i], t); + } + else { + int oflow = add_overflow(constmap[i][s], imm[i], sum); + if (is_addi && oflow) + do_exception_check = 2; + else + emit_movimm(sum, t); + } + } + if (do_exception_check) { + void *jaddr = out; + if (do_exception_check == 2) + emit_jmp(0); + else { + emit_jo(0); + if (tmp != t) + emit_mov(tmp, t); + } + add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0); } } } @@ -2515,26 +2600,11 @@ static void imm16_assemble(int i, const struct regstat *i_regs) } } } - if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU - if(dops[i].rt1) { - signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); - sl=get_reg(i_regs->regmap,dops[i].rs1); - if(tl>=0) { - if(dops[i].rs1) { - assert(sl>=0); - emit_addimm(sl,imm[i],tl); - } else { - emit_movimm(imm[i],tl); - } - } - } - } else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU if(dops[i].rt1) { //assert(dops[i].rs1!=0); // r0 might be valid, but it's probably a bug signed char sl,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); sl=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0) { @@ -2573,7 +2643,7 @@ static void imm16_assemble(int i, const struct regstat *i_regs) else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); sl=get_reg(i_regs->regmap,dops[i].rs1); if(tl>=0 && !((i_regs->isconst>>tl)&1)) { if(dops[i].opcode==0x0c) //ANDI @@ -2634,7 +2704,7 @@ static void shiftimm_assemble(int i, const struct regstat *i_regs) { if(dops[i].rt1) { signed char s,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0&&!((i_regs->isconst>>t)&1)){ @@ -2940,7 +3010,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) int offset_reg = -1; int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); offset=imm[i]; if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<>16)==0x1f80)) - ||dops[i].rt1==0) { + if(tl<0 && ((!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) || dops[i].rt1==0)) { // could be FIFO, must perform the read // ||dummy read assem_debug("(forced read)\n"); @@ -2982,7 +3050,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) else if (ram_offset && memtarget) { offset_reg = get_ro_reg(i_regs, 0); } - int dummy=(dops[i].rt1==0)||(tl!=get_reg(i_regs->regmap,dops[i].rt1)); // ignore loads to r0 and unneeded reg + int dummy=(dops[i].rt1==0)||(tl!=get_reg_w(i_regs->regmap, dops[i].rt1)); // ignore loads to r0 and unneeded reg switch (dops[i].opcode) { case 0x20: // LB if(!c||memtarget) { @@ -3077,7 +3145,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) default: assert(0); } - } + } // tl >= 0 if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); } @@ -3092,7 +3160,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) int offset_reg = -1; int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg_temp(i_regs->regmap); temp2=get_reg(i_regs->regmap,FTEMP); @@ -3483,9 +3551,8 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) { if(dops[i].opcode2==0) // MFC0 { - signed char t=get_reg(i_regs->regmap,dops[i].rt1); + signed char t=get_reg_w(i_regs->regmap, dops[i].rt1); u_int copr=(source[i]>>11)&0x1f; - //assert(t>=0); // Why does this happen? OOT is weird if(t>=0&&dops[i].rt1!=0) { emit_readword(®_cop0[copr],t); } @@ -3554,61 +3621,15 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) } emit_loadreg(dops[i].rs1,s); } - else - { - assert(dops[i].opcode2==0x10); - //if((source[i]&0x3f)==0x10) // RFE - { - emit_readword(&psxRegs.CP0.n.SR,0); - emit_andimm(0,0x3c,1); - emit_andimm(0,~0xf,0); - emit_orrshr_imm(1,2,0); - emit_writeword(0,&psxRegs.CP0.n.SR); - } - } -} - -static void cop1_unusable(int i, const struct regstat *i_regs) -{ - // XXX: should just just do the exception instead - //if(!cop1_usable) - { - void *jaddr=out; - emit_jmp(0); - add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0); - } } -static void cop1_assemble(int i, const struct regstat *i_regs) +static void rfe_assemble(int i, const struct regstat *i_regs, int ccadj_) { - cop1_unusable(i, i_regs); -} - -static void c1ls_assemble(int i, const struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -// FP_STUB -static void do_cop1stub(int n) -{ - literal_pool(256); - assem_debug("do_cop1stub %x\n",start+stubs[n].a*4); - set_jump_target(stubs[n].addr, out); - int i=stubs[n].a; -// int rs=stubs[n].b; - struct regstat *i_regs=(struct regstat *)stubs[n].c; - int ds=stubs[n].d; - if(!ds) { - load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); - //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs); - } - //else {printf("fp exception in delay slot\n");} - wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty); - if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_movimm(start+(i-ds)*4,0); // Get PC - emit_addimm(HOST_CCREG,ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_far_jump(ds?fp_exception_ds:fp_exception); + emit_readword(&psxRegs.CP0.n.SR, 0); + emit_andimm(0, 0x3c, 1); + emit_andimm(0, ~0xf, 0); + emit_orrshr_imm(1, 2, 0); + emit_writeword(0, &psxRegs.CP0.n.SR); } static int cop2_is_stalling_op(int i, int *cycles) @@ -4018,7 +4039,7 @@ static void cop2_assemble(int i, const struct regstat *i_regs) cop2_do_stall_check(0, i, i_regs, reglist); } if (dops[i].opcode2==0) { // MFC2 - signed char tl=get_reg(i_regs->regmap,dops[i].rt1); + signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1); if(tl>=0&&dops[i].rt1!=0) cop2_get_dreg(copr,tl,temp); } @@ -4028,7 +4049,7 @@ static void cop2_assemble(int i, const struct regstat *i_regs) } else if (dops[i].opcode2==2) // CFC2 { - signed char tl=get_reg(i_regs->regmap,dops[i].rt1); + signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1); if(tl>=0&&dops[i].rt1!=0) emit_readword(®_cop2c[copr],tl); } @@ -4092,6 +4113,18 @@ static void do_unalignedwritestub(int n) emit_jmp(stubs[n].retaddr); // return address } +static void do_overflowstub(int n) +{ + assem_debug("do_overflowstub %x\n", start + (u_int)stubs[n].a * 4); + literal_pool(24); + int i = stubs[n].a; + struct regstat *i_regs = (struct regstat *)stubs[n].c; + int ccadj = stubs[n].d; + set_jump_target(stubs[n].addr, out); + wb_dirtys(regs[i].regmap, regs[i].dirty); + exception_assemble(i, i_regs, ccadj); +} + #ifndef multdiv_assemble void multdiv_assemble(int i,struct regstat *i_regs) { @@ -4106,7 +4139,7 @@ static void mov_assemble(int i, const struct regstat *i_regs) //if(dops[i].opcode2==0x11||dops[i].opcode2==0x13) { // MTHI/MTLO if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(tl>=0); if(tl>=0) { sl=get_reg(i_regs->regmap,dops[i].rs1); @@ -4137,13 +4170,17 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, emit_far_jump(jump_to_new_pc); } -static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_) { // 'break' tends to be littered around to catch things like // division by 0 and is almost never executed, so don't emit much code here - void *func = (dops[i].opcode2 == 0x0C) - ? (is_delayslot ? jump_syscall_ds : jump_syscall) - : (is_delayslot ? jump_break_ds : jump_break); + void *func; + if (dops[i].itype == ALU || dops[i].itype == IMM16) + func = is_delayslot ? jump_overflow_ds : jump_overflow; + else if (dops[i].opcode2 == 0x0C) + func = is_delayslot ? jump_syscall_ds : jump_syscall; + else + func = is_delayslot ? jump_break_ds : jump_break; assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); emit_movimm(start + i*4, 2); // pc emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); @@ -4152,7 +4189,7 @@ static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) static void hlecall_bad() { - SysPrintf("bad hlecall\n"); + assert(0); } static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) @@ -4214,7 +4251,7 @@ static void speculate_register_values(int i) // fallthrough case IMM16: if(dops[i].rt1&&is_const(®s[i],dops[i].rt1)) { - int value,hr=get_reg(regs[i].regmap,dops[i].rt1); + int value,hr=get_reg_w(regs[i].regmap, dops[i].rt1); if(hr>=0) { if(get_final_value(hr,i,&value)) smrv[dops[i].rt1]=value; @@ -4272,10 +4309,10 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) int ds = 0; switch (dops[i].itype) { case ALU: - alu_assemble(i, i_regs); + alu_assemble(i, i_regs, ccadj_); break; case IMM16: - imm16_assemble(i, i_regs); + imm16_assemble(i, i_regs, ccadj_); break; case SHIFT: shift_assemble(i, i_regs); @@ -4298,11 +4335,8 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) case COP0: cop0_assemble(i, i_regs, ccadj_); break; - case COP1: - cop1_assemble(i, i_regs); - break; - case C1LS: - c1ls_assemble(i, i_regs); + case RFE: + rfe_assemble(i, i_regs, ccadj_); break; case COP2: cop2_assemble(i, i_regs); @@ -4321,7 +4355,7 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) mov_assemble(i, i_regs); break; case SYSCALL: - syscall_assemble(i, i_regs, ccadj_); + exception_assemble(i, i_regs, ccadj_); break; case HLECALL: hlecall_assemble(i, i_regs, ccadj_); @@ -4347,7 +4381,6 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) break; case NOP: case OTHER: - case NI: // not handled, just skip break; default: @@ -4460,7 +4493,7 @@ static void address_generation(int i, const struct regstat *i_regs, signed char int ra=-1; int agr=AGEN1+(i&1); if(dops[i].itype==LOAD) { - ra=get_reg(i_regs->regmap,dops[i].rt1); + ra=get_reg_w(i_regs->regmap, dops[i].rt1); if(ra<0) ra=get_reg_temp(i_regs->regmap); assert(ra>=0); } @@ -5364,7 +5397,7 @@ static void rjump_assemble_write_ra(int i) int rt,return_address; assert(dops[i+1].rt1!=dops[i].rt1); assert(dops[i+1].rt2!=dops[i].rt1); - rt=get_reg(branch_regs[i].regmap,dops[i].rt1); + rt=get_reg_w(branch_regs[i].regmap, dops[i].rt1); assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(rt>=0); return_address=start+i*4+8; @@ -5457,7 +5490,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) //assert(adj==0); emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); - if(dops[i+1].itype==COP0 && dops[i+1].opcode2==0x10) + if (dops[i+1].itype == RFE) // special case for RFE emit_jmp(0); else @@ -6131,13 +6164,6 @@ void disassemble_inst(int i) printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC0 else printf (" %x: %s\n",start+i*4,insn[i]); break; - case COP1: - if(dops[i].opcode2<3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC1 - else if(dops[i].opcode2>3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC1 - else printf (" %x: %s\n",start+i*4,insn[i]); - break; case COP2: if(dops[i].opcode2<3) printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC2 @@ -6145,9 +6171,6 @@ void disassemble_inst(int i) printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC2 else printf (" %x: %s\n",start+i*4,insn[i]); break; - case C1LS: - printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); - break; case C2LS: printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); break; @@ -6158,9 +6181,12 @@ void disassemble_inst(int i) //printf (" %s %8x\n",insn[i],source[i]); printf (" %x: %s\n",start+i*4,insn[i]); } + #ifndef REGMAP_PRINT return; - printf("D: %"PRIu64" WD: %"PRIu64" U: %"PRIu64"\n", - regs[i].dirty, regs[i].wasdirty, unneeded_reg[i]); + #endif + printf("D: %"PRIx64" WD: %"PRIx64" U: %"PRIx64" hC: %x hWC: %x hLC: %x\n", + regs[i].dirty, regs[i].wasdirty, unneeded_reg[i], + regs[i].isconst, regs[i].wasconst, regs[i].loadedconst); print_regmap("pre: ", regmap_pre[i]); print_regmap("entry: ", regs[i].regmap_entry); print_regmap("map: ", regs[i].regmap); @@ -6551,17 +6577,20 @@ static int apply_hacks(void) static noinline void pass1_disassemble(u_int pagelimit) { int i, j, done = 0, ni_count = 0; - unsigned int type,op,op2; + unsigned int type,op,op2,op3; for (i = 0; !done; i++) { + int force_prev_to_interpreter = 0; memset(&dops[i], 0, sizeof(dops[i])); - op2=0; - minimum_free_regs[i]=0; - dops[i].opcode=op=source[i]>>26; + op2 = 0; + minimum_free_regs[i] = 0; + dops[i].opcode = op = source[i] >> 26; + type = INTCALL; + set_mnemonic(i, "???"); switch(op) { - case 0x00: set_mnemonic(i, "special"); type=NI; + case 0x00: set_mnemonic(i, "special"); op2=source[i]&0x3f; switch(op2) { @@ -6594,51 +6623,20 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x27: set_mnemonic(i, "NOR"); type=ALU; break; case 0x2A: set_mnemonic(i, "SLT"); type=ALU; break; case 0x2B: set_mnemonic(i, "SLTU"); type=ALU; break; - case 0x30: set_mnemonic(i, "TGE"); type=NI; break; - case 0x31: set_mnemonic(i, "TGEU"); type=NI; break; - case 0x32: set_mnemonic(i, "TLT"); type=NI; break; - case 0x33: set_mnemonic(i, "TLTU"); type=NI; break; - case 0x34: set_mnemonic(i, "TEQ"); type=NI; break; - case 0x36: set_mnemonic(i, "TNE"); type=NI; break; -#if 0 - case 0x14: set_mnemonic(i, "DSLLV"); type=SHIFT; break; - case 0x16: set_mnemonic(i, "DSRLV"); type=SHIFT; break; - case 0x17: set_mnemonic(i, "DSRAV"); type=SHIFT; break; - case 0x1C: set_mnemonic(i, "DMULT"); type=MULTDIV; break; - case 0x1D: set_mnemonic(i, "DMULTU"); type=MULTDIV; break; - case 0x1E: set_mnemonic(i, "DDIV"); type=MULTDIV; break; - case 0x1F: set_mnemonic(i, "DDIVU"); type=MULTDIV; break; - case 0x2C: set_mnemonic(i, "DADD"); type=ALU; break; - case 0x2D: set_mnemonic(i, "DADDU"); type=ALU; break; - case 0x2E: set_mnemonic(i, "DSUB"); type=ALU; break; - case 0x2F: set_mnemonic(i, "DSUBU"); type=ALU; break; - case 0x38: set_mnemonic(i, "DSLL"); type=SHIFTIMM; break; - case 0x3A: set_mnemonic(i, "DSRL"); type=SHIFTIMM; break; - case 0x3B: set_mnemonic(i, "DSRA"); type=SHIFTIMM; break; - case 0x3C: set_mnemonic(i, "DSLL32"); type=SHIFTIMM; break; - case 0x3E: set_mnemonic(i, "DSRL32"); type=SHIFTIMM; break; - case 0x3F: set_mnemonic(i, "DSRA32"); type=SHIFTIMM; break; -#endif } break; - case 0x01: set_mnemonic(i, "regimm"); type=NI; - op2=(source[i]>>16)&0x1f; + case 0x01: set_mnemonic(i, "regimm"); + type = SJUMP; + op2 = (source[i] >> 16) & 0x1f; switch(op2) { - case 0x00: set_mnemonic(i, "BLTZ"); type=SJUMP; break; - case 0x01: set_mnemonic(i, "BGEZ"); type=SJUMP; break; - //case 0x02: set_mnemonic(i, "BLTZL"); type=SJUMP; break; - //case 0x03: set_mnemonic(i, "BGEZL"); type=SJUMP; break; - //case 0x08: set_mnemonic(i, "TGEI"); type=NI; break; - //case 0x09: set_mnemonic(i, "TGEIU"); type=NI; break; - //case 0x0A: set_mnemonic(i, "TLTI"); type=NI; break; - //case 0x0B: set_mnemonic(i, "TLTIU"); type=NI; break; - //case 0x0C: set_mnemonic(i, "TEQI"); type=NI; break; - //case 0x0E: set_mnemonic(i, "TNEI"); type=NI; break; - case 0x10: set_mnemonic(i, "BLTZAL"); type=SJUMP; break; - case 0x11: set_mnemonic(i, "BGEZAL"); type=SJUMP; break; - //case 0x12: set_mnemonic(i, "BLTZALL"); type=SJUMP; break; - //case 0x13: set_mnemonic(i, "BGEZALL"); type=SJUMP; break; + case 0x10: set_mnemonic(i, "BLTZAL"); break; + case 0x11: set_mnemonic(i, "BGEZAL"); break; + default: + if (op2 & 1) + set_mnemonic(i, "BGEZ"); + else + set_mnemonic(i, "BLTZ"); } break; case 0x02: set_mnemonic(i, "J"); type=UJUMP; break; @@ -6655,68 +6653,40 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x0D: set_mnemonic(i, "ORI"); type=IMM16; break; case 0x0E: set_mnemonic(i, "XORI"); type=IMM16; break; case 0x0F: set_mnemonic(i, "LUI"); type=IMM16; break; - case 0x10: set_mnemonic(i, "cop0"); type=NI; - op2=(source[i]>>21)&0x1f; + case 0x10: set_mnemonic(i, "COP0"); + op2 = (source[i]>>21) & 0x1f; + if (op2 & 0x10) { + op3 = source[i] & 0x1f; + switch (op3) + { + case 0x01: case 0x02: case 0x06: case 0x08: type = INTCALL; break; + case 0x10: set_mnemonic(i, "RFE"); type=RFE; break; + default: type = OTHER; break; + } + break; + } switch(op2) { - case 0x00: set_mnemonic(i, "MFC0"); type=COP0; break; - case 0x02: set_mnemonic(i, "CFC0"); type=COP0; break; + u32 rd; + case 0x00: + set_mnemonic(i, "MFC0"); + rd = (source[i] >> 11) & 0x1F; + if (!(0x00000417u & (1u << rd))) + type = COP0; + break; case 0x04: set_mnemonic(i, "MTC0"); type=COP0; break; - case 0x06: set_mnemonic(i, "CTC0"); type=COP0; break; - case 0x10: set_mnemonic(i, "RFE"); type=COP0; break; + case 0x02: + case 0x06: type = INTCALL; break; + default: type = OTHER; break; } break; - case 0x11: set_mnemonic(i, "cop1"); type=COP1; + case 0x11: set_mnemonic(i, "COP1"); op2=(source[i]>>21)&0x1f; break; -#if 0 - case 0x14: set_mnemonic(i, "BEQL"); type=CJUMP; break; - case 0x15: set_mnemonic(i, "BNEL"); type=CJUMP; break; - case 0x16: set_mnemonic(i, "BLEZL"); type=CJUMP; break; - case 0x17: set_mnemonic(i, "BGTZL"); type=CJUMP; break; - case 0x18: set_mnemonic(i, "DADDI"); type=IMM16; break; - case 0x19: set_mnemonic(i, "DADDIU"); type=IMM16; break; - case 0x1A: set_mnemonic(i, "LDL"); type=LOADLR; break; - case 0x1B: set_mnemonic(i, "LDR"); type=LOADLR; break; -#endif - case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; - case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; - case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; - case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; - case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; - case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; - case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; -#if 0 - case 0x27: set_mnemonic(i, "LWU"); type=LOAD; break; -#endif - case 0x28: set_mnemonic(i, "SB"); type=STORE; break; - case 0x29: set_mnemonic(i, "SH"); type=STORE; break; - case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; - case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; -#if 0 - case 0x2C: set_mnemonic(i, "SDL"); type=STORELR; break; - case 0x2D: set_mnemonic(i, "SDR"); type=STORELR; break; -#endif - case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; - case 0x2F: set_mnemonic(i, "CACHE"); type=NOP; break; - case 0x30: set_mnemonic(i, "LL"); type=NI; break; - case 0x31: set_mnemonic(i, "LWC1"); type=C1LS; break; -#if 0 - case 0x34: set_mnemonic(i, "LLD"); type=NI; break; - case 0x35: set_mnemonic(i, "LDC1"); type=C1LS; break; - case 0x37: set_mnemonic(i, "LD"); type=LOAD; break; -#endif - case 0x38: set_mnemonic(i, "SC"); type=NI; break; - case 0x39: set_mnemonic(i, "SWC1"); type=C1LS; break; -#if 0 - case 0x3C: set_mnemonic(i, "SCD"); type=NI; break; - case 0x3D: set_mnemonic(i, "SDC1"); type=C1LS; break; - case 0x3F: set_mnemonic(i, "SD"); type=STORE; break; -#endif - case 0x12: set_mnemonic(i, "COP2"); type=NI; + case 0x12: set_mnemonic(i, "COP2"); op2=(source[i]>>21)&0x1f; - //if (op2 & 0x10) - if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns + if (op2 & 0x10) { + type = OTHER; if (gte_handlers[source[i]&0x3f]!=NULL) { #ifdef DISASM if (gte_regnames[source[i]&0x3f]!=NULL) @@ -6724,7 +6694,7 @@ static noinline void pass1_disassemble(u_int pagelimit) else snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); #endif - type=C2OP; + type = C2OP; } } else switch(op2) @@ -6735,32 +6705,53 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x06: set_mnemonic(i, "CTC2"); type=COP2; break; } break; + case 0x13: set_mnemonic(i, "COP3"); + op2=(source[i]>>21)&0x1f; + break; + case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; + case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; + case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; + case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; + case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; + case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; + case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; + case 0x28: set_mnemonic(i, "SB"); type=STORE; break; + case 0x29: set_mnemonic(i, "SH"); type=STORE; break; + case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; + case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; + case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break; case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break; - case 0x3B: set_mnemonic(i, "HLECALL"); type=HLECALL; break; - default: set_mnemonic(i, "???"); type=NI; - SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); + case 0x3B: + if (Config.HLE && (source[i] & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) { + set_mnemonic(i, "HLECALL"); + type = HLECALL; + } + break; + default: break; } + if (type == INTCALL) + SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); dops[i].itype=type; dops[i].opcode2=op2; /* Get registers/immediates */ dops[i].use_lt1=0; gte_rs[i]=gte_rt[i]=0; + dops[i].rs1 = 0; + dops[i].rs2 = 0; + dops[i].rt1 = 0; + dops[i].rt2 = 0; switch(type) { case LOAD: dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=0; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case STORE: case STORELR: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; - dops[i].rt1=0; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case LOADLR: @@ -6769,7 +6760,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case IMM16: @@ -6777,7 +6767,6 @@ static noinline void pass1_disassemble(u_int pagelimit) else dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=0; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI imm[i]=(unsigned short)source[i]; }else{ @@ -6785,10 +6774,6 @@ static noinline void pass1_disassemble(u_int pagelimit) } break; case UJUMP: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; // The JAL instruction writes to r31. if (op&1) { dops[i].rt1=31; @@ -6797,9 +6782,6 @@ static noinline void pass1_disassemble(u_int pagelimit) break; case RJUMP: dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; // The JALR instruction writes to rd. if (op2&1) { dops[i].rt1=(source[i]>>11)&0x1f; @@ -6809,8 +6791,6 @@ static noinline void pass1_disassemble(u_int pagelimit) case CJUMP: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; - dops[i].rt1=0; - dops[i].rt2=0; if(op&2) { // BGTZ/BLEZ dops[i].rs2=0; } @@ -6818,10 +6798,8 @@ static noinline void pass1_disassemble(u_int pagelimit) case SJUMP: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=CCREG; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2&0x10) { // BxxAL - dops[i].rt1=31; + if (op2 == 0x10 || op2 == 0x11) { // BxxAL + dops[i].rt1 = 31; // NOTE: If the branch is not taken, r31 is still overwritten } break; @@ -6829,7 +6807,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>21)&0x1f; // source dops[i].rs2=(source[i]>>16)&0x1f; // subtract amount dops[i].rt1=(source[i]>>11)&0x1f; // destination - dops[i].rt2=0; break; case MULTDIV: dops[i].rs1=(source[i]>>21)&0x1f; // source @@ -6838,10 +6815,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rt2=LOREG; break; case MOV: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; if(op2==0x10) dops[i].rs1=HIREG; // MFHI if(op2==0x11) dops[i].rt1=HIREG; // MTHI if(op2==0x12) dops[i].rs1=LOREG; // MFLO @@ -6853,41 +6826,19 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>16)&0x1f; // target of shift dops[i].rs2=(source[i]>>21)&0x1f; // shift amount dops[i].rt1=(source[i]>>11)&0x1f; // destination - dops[i].rt2=0; break; case SHIFTIMM: dops[i].rs1=(source[i]>>16)&0x1f; dops[i].rs2=0; dops[i].rt1=(source[i]>>11)&0x1f; - dops[i].rt2=0; imm[i]=(source[i]>>6)&0x1f; - // DSxx32 instructions - if(op2>=0x3c) imm[i]|=0x20; break; case COP0: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2==0||op2==2) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0/CFC0 - if(op2==4||op2==6) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0/CTC0 + if(op2==0) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0 + if(op2==4) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0 if(op2==4&&((source[i]>>11)&0x1f)==12) dops[i].rt2=CSREG; // Status - if(op2==16) if((source[i]&0x3f)==0x18) dops[i].rs2=CCREG; // ERET - break; - case COP1: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1 - if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1 - dops[i].rs2=CSREG; break; case COP2: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC2/CFC2 if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC2/CTC2 dops[i].rs2=CSREG; @@ -6900,27 +6851,13 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2 } break; - case C1LS: - dops[i].rs1=(source[i]>>21)&0x1F; - dops[i].rs2=CSREG; - dops[i].rt1=0; - dops[i].rt2=0; - imm[i]=(short)source[i]; - break; case C2LS: dops[i].rs1=(source[i]>>21)&0x1F; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; imm[i]=(short)source[i]; if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 break; case C2OP: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; gte_rs[i]=gte_reg_reads[source[i]&0x3f]; gte_rt[i]=gte_reg_writes[source[i]&0x3f]; gte_rt[i]|=1ll<<63; // every op changes flags @@ -6935,15 +6872,9 @@ static noinline void pass1_disassemble(u_int pagelimit) case HLECALL: case INTCALL: dops[i].rs1=CCREG; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; break; default: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; + break; } /* Calculate branch target addresses */ if(type==UJUMP) @@ -6970,41 +6901,48 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].is_jump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP); dops[i].is_ujump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP); // || (source[i] >> 16) == 0x1000 // beq r0,r0 dops[i].is_load = (dops[i].itype == LOAD || dops[i].itype == LOADLR || op == 0x32); // LWC2 + dops[i].is_delay_load = (dops[i].is_load || (source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC dops[i].is_store = (dops[i].itype == STORE || dops[i].itype == STORELR || op == 0x3a); // SWC2 + dops[i].is_exception = (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL); + dops[i].may_except = dops[i].is_exception || (dops[i].itype == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8; - /* messy cases to just pass over to the interpreter */ + /* rare messy cases to just pass over to the interpreter */ if (i > 0 && dops[i-1].is_jump) { - int do_in_intrp=0; // branch in delay slot? if (dops[i].is_jump) { // don't handle first branch and call interpreter if it's hit - SysPrintf("branch in delay slot @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; } - // basic load delay detection - else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&dops[i].rt1!=0) { + // basic load delay detection through a branch + else if (dops[i].is_delay_load && dops[i].rt1 != 0) { int t=(ba[i-1]-start)/4; if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { // jump target wants DS result - potential load delay effect - SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; dops[t+1].bt=1; // expected return from interpreter } else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& !(i>=3&&dops[i-3].is_jump)) { // v0 overwrite like this is a sign of trouble, bail out SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + force_prev_to_interpreter = 1; } } - if (do_in_intrp) { - memset(&dops[i-1], 0, sizeof(dops[i-1])); - dops[i-1].itype = INTCALL; - dops[i-1].rs1 = CCREG; - ba[i-1] = -1; - done = 2; - i--; // don't compile the DS - } + } + else if (i > 0 && dops[i-1].is_delay_load && dops[i-1].rt1 != 0 + && (dops[i].rs1 == dops[i-1].rt1 || dops[i].rs2 == dops[i-1].rt1)) { + SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; + } + if (force_prev_to_interpreter) { + memset(&dops[i-1], 0, sizeof(dops[i-1])); + dops[i-1].itype = INTCALL; + dops[i-1].rs1 = CCREG; + ba[i-1] = -1; + done = 2; + i--; // don't compile the DS/problematic load/etc } /* Is this the end of the block? */ @@ -7038,7 +6976,11 @@ static noinline void pass1_disassemble(u_int pagelimit) // Don't get too close to the limit if(i>MAXBLOCK/2) done=1; } - if (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL) + if (dops[i].itype == HLECALL) + stop = 1; + else if (dops[i].itype == INTCALL) + stop = 2; + else if (dops[i].is_exception) done = stop_after_jal ? 1 : 2; if (done == 2) { // Does the block continue due to a branch? @@ -7054,7 +6996,7 @@ static noinline void pass1_disassemble(u_int pagelimit) assert(start+i*4 8 || dops[i].opcode == 0x11)) { + if (dops[i].itype == INTCALL && (++ni_count > 8 || dops[i].opcode == 0x11)) { done=stop_after_jal=1; SysPrintf("Disabled speculative precompilation\n"); } @@ -7177,14 +7119,13 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) } } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) + else if(dops[i].may_except) { - // SYSCALL instruction (software interrupt) + // SYSCALL instruction, etc or conditional exception u=1; } - else if(dops[i].itype==COP0 && dops[i].opcode2==0x10) + else if (dops[i].itype == RFE) { - // RFE u=1; } //u=1; // DEBUG @@ -7356,7 +7297,6 @@ static noinline void pass3_register_alloc(u_int addr) delayslot_alloc(¤t,i+1); //current.isconst=0; // DEBUG ds=1; - //printf("i=%d, isconst=%x\n",i,current.isconst); break; case RJUMP: //current.isconst=0; @@ -7472,13 +7412,8 @@ static noinline void pass3_register_alloc(u_int addr) //current.isconst=0; break; case SJUMP: - //current.isconst=0; - //current.wasconst=0; - //regs[i].wasconst=0; clear_const(¤t,dops[i].rs1); clear_const(¤t,dops[i].rt1); - //if((dops[i].opcode2&0x1E)==0x0) // BLTZ/BGEZ - if((dops[i].opcode2&0x0E)==0x0) // BLTZ/BGEZ { alloc_cc(¤t,i); dirty_reg(¤t,CCREG); @@ -7486,9 +7421,6 @@ static noinline void pass3_register_alloc(u_int addr) if (dops[i].rt1==31) { // BLTZAL/BGEZAL alloc_reg(¤t,i,31); dirty_reg(¤t,31); - //#ifdef REG_PREFETCH - //alloc_reg(¤t,i,PTEMP); - //#endif } if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition. ||(dops[i].rt1==31&&(dops[i+1].rs1==31||dops[i+1].rs2==31||dops[i+1].rt1==31||dops[i+1].rt2==31))) { // DS touches $ra @@ -7504,17 +7436,6 @@ static noinline void pass3_register_alloc(u_int addr) delayslot_alloc(¤t,i+1); } } - else - // Don't alloc the delay slot yet because we might not execute it - if((dops[i].opcode2&0x1E)==0x2) // BLTZL/BGEZL - { - current.isconst=0; - current.wasconst=0; - regs[i].wasconst=0; - alloc_cc(¤t,i); - dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,dops[i].rs1); - } ds=1; //current.isconst=0; break; @@ -7547,14 +7468,12 @@ static noinline void pass3_register_alloc(u_int addr) case COP0: cop0_alloc(¤t,i); break; - case COP1: + case RFE: + rfe_alloc(¤t,i); break; case COP2: cop2_alloc(¤t,i); break; - case C1LS: - c1ls_alloc(¤t,i); - break; case C2LS: c2ls_alloc(¤t,i); break; @@ -7746,8 +7665,6 @@ static noinline void pass3_register_alloc(u_int addr) } break; case SJUMP: - //if((dops[i-1].opcode2&0x1E)==0) // BLTZ/BGEZ - if((dops[i-1].opcode2&0x0E)==0) // BLTZ/BGEZ { alloc_cc(¤t,i-1); dirty_reg(¤t,CCREG); @@ -7771,22 +7688,8 @@ static noinline void pass3_register_alloc(u_int addr) memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } - else - // Alloc the delay slot in case the branch is taken - if((dops[i-1].opcode2&0x1E)==2) // BLTZL/BGEZL - { - memcpy(&branch_regs[i-1],¤t,sizeof(current)); - branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL< 0 && (dops[i-1].is_jump || dops[i].itype == SYSCALL || dops[i].itype == HLECALL)) + if (i > 0 && (dops[i-1].is_jump || dops[i].is_exception)) { cc=0; } @@ -7940,14 +7843,9 @@ static noinline void pass4_cull_unused_regs(void) nr |= get_regm(regs[i].regmap_entry, INVCP); } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) + else if (dops[i].may_except) { - // SYSCALL instruction (software interrupt) - nr=0; - } - else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18) - { - // ERET instruction (return from interrupt) + // SYSCALL instruction, etc or conditional exception nr=0; } else // Non-branch @@ -8110,8 +8008,8 @@ static noinline void pass5a_preallocate1(void) if(ba[i]>=start && ba[i]<(start+i*4)) if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU ||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD - ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR||dops[i+1].itype==C1LS - ||dops[i+1].itype==SHIFT||dops[i+1].itype==COP1 + ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR + ||dops[i+1].itype==SHIFT ||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP) { int t=(ba[i]-start)>>2; @@ -8379,9 +8277,9 @@ static noinline void pass5a_preallocate1(void) } } } - if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=C1LS&&dops[i].itype!=SHIFT&& + if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=SHIFT&& dops[i].itype!=NOP&&dops[i].itype!=MOV&&dops[i].itype!=ALU&&dops[i].itype!=SHIFTIMM&& - dops[i].itype!=IMM16&&dops[i].itype!=LOAD&&dops[i].itype!=COP1) + dops[i].itype!=IMM16&&dops[i].itype!=LOAD) { memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap)); } @@ -8401,7 +8299,7 @@ static noinline void pass5b_preallocate2(void) if(!dops[i+1].bt) { if(dops[i].itype==ALU||dops[i].itype==MOV||dops[i].itype==LOAD||dops[i].itype==SHIFTIMM||dops[i].itype==IMM16 - ||((dops[i].itype==COP1||dops[i].itype==COP2)&&dops[i].opcode2<3)) + ||(dops[i].itype==COP2&&dops[i].opcode2<3)) { if(dops[i+1].rs1) { if((hr=get_reg(regs[i+1].regmap,dops[i+1].rs1))>=0) @@ -8437,7 +8335,7 @@ static noinline void pass5b_preallocate2(void) } // Preload target address for load instruction (non-constant) if(dops[i+1].itype==LOAD&&dops[i+1].rs1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) + if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8454,7 +8352,7 @@ static noinline void pass5b_preallocate2(void) } // Load source into target register if(dops[i+1].use_lt1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) + if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8528,10 +8426,10 @@ static noinline void pass5b_preallocate2(void) } } } - if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C1LS||||dops[i+1].itype==C2LS*/) { + if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C2LS*/) { hr = -1; if(dops[i+1].itype==LOAD) - hr=get_reg(regs[i+1].regmap,dops[i+1].rt1); + hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1); if(dops[i+1].itype==LOADLR||(dops[i+1].opcode&0x3b)==0x31||(dops[i+1].opcode&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); if(dops[i+1].itype==STORE||dops[i+1].itype==STORELR||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2 @@ -8808,15 +8706,9 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) } } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) - { - // SYSCALL instruction (software interrupt) - will_dirty_i=0; - wont_dirty_i=0; - } - else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18) + else if (dops[i].may_except) { - // ERET instruction (return from interrupt) + // SYSCALL instruction, etc or conditional exception will_dirty_i=0; wont_dirty_i=0; } @@ -8987,14 +8879,21 @@ static int new_recompile_block(u_int addr) assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out); + if (addr & 3) { + if (addr != hack_addr) { + SysPrintf("game crash @%08x, ra=%08x\n", addr, psxRegs.GPR.n.ra); + hack_addr = addr; + } + return -1; + } + // this is just for speculation for (i = 1; i < 32; i++) { if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000) state_rflags |= 1 << i; } - assert(!(addr & 3)); - start = addr & ~3; + start = addr; new_dynarec_did_compile=1; if (Config.HLE && start == 0x80001000) // hlecall { @@ -9248,32 +9147,31 @@ static int new_recompile_block(u_int addr) emit_jmp(0); } - // TODO: delay slot stubs? // Stubs - for(i=0;i 0 extern int last_count; // last absolute target, often = next_interupt -@@ -532,6 +532,7 @@ static int cycle_multiplier_active; +@@ -593,6 +593,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { @@ -19,7 +19,7 @@ index b160a4a..0d91999 100644 int m = cycle_multiplier_active; int s = (x >> 31) | 1; return (x * m + s * 50) / 100; -@@ -662,6 +663,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) +@@ -745,6 +746,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) { @@ -29,7 +29,7 @@ index b160a4a..0d91999 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7046,7 +7050,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7143,7 +7147,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,7 +38,7 @@ index b160a4a..0d91999 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8236,6 +8240,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8292,6 +8296,7 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr; @@ -46,7 +46,7 @@ index b160a4a..0d91999 100644 for(i=0;isubCycleStep >= 0x10000); + regs->subCycle += regs->subCycleStep; +- regs->cycle += regs->subCycle >> 16; ++ regs->cycle += 2; //regs->subCycle >> 16; + regs->subCycle &= 0xffff; + } + +@@ -1341,8 +1341,14 @@ static void intShutdown() { + + // single step (may do several ops in case of a branch or load delay) + void execI(psxRegisters *regs) { ++ extern int last_count; ++ void do_insn_cmp(void); ++ printf("execI %08x c %u, ni %u\n", regs->pc, regs->cycle, next_interupt); ++ last_count = 0; + do { + execIbp(psxMemRLUT, regs); ++ if (regs->dloadReg[0] || regs->dloadReg[1]) ++ do_insn_cmp(); + } while (regs->dloadReg[0] || regs->dloadReg[1]); + } + diff --git a/libpcsxcore/new_dynarec/patches/trace_intr b/libpcsxcore/new_dynarec/patches/trace_intr index c3f4cf13..40b3edb7 100644 --- a/libpcsxcore/new_dynarec/patches/trace_intr +++ b/libpcsxcore/new_dynarec/patches/trace_intr @@ -1,12 +1,13 @@ diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c -index 10d99ba..1e097ae 100644 +index 89716fa0..02a8d7c5 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c -@@ -405,13 +407,17 @@ static void ari64_shutdown() +@@ -320,13 +320,18 @@ static void ari64_shutdown() { new_dynarec_cleanup(); new_dyna_pcsx_mem_shutdown(); + (void)ari64_execute; ++ (void)ari64_execute_block; } +extern void intExecuteT(); @@ -16,13 +17,13 @@ index 10d99ba..1e097ae 100644 ari64_init, ari64_reset, - ari64_execute, -- ari64_execute_until, +- ari64_execute_block, + intExecuteT, + intExecuteBlockT, ari64_clear, ari64_notify, ari64_apply_config, -@@ -481,7 +487,7 @@ static u32 memcheck_read(u32 a) +@@ -395,7 +400,7 @@ static u32 memcheck_read(u32 a) return *(u32 *)(psxM + (a & 0x1ffffc)); } @@ -32,23 +33,23 @@ index 10d99ba..1e097ae 100644 { static psxRegisters oldregs; diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c -index bb471b6..8f68a3b 100644 +index 190f8fc7..5feb7a02 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c -@@ -272,6 +272,8 @@ static void write_biu(u32 value) - if (address != 0xfffe0130) +@@ -289,6 +289,8 @@ static void write_biu(u32 value) return; + } +extern u32 handler_cycle; +handler_cycle = psxRegs.cycle; - switch (value) { - case 0x800: case 0x804: - unmap_ram_write(); + memprintf("write_biu %08x @%08x %u\n", value, psxRegs.pc, psxRegs.cycle); + psxRegs.biuReg = value; + } diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c -index ff0efbc..4459644 100644 +index 18bd6a4e..bc2eb3f6 100644 --- a/libpcsxcore/psxcounters.c +++ b/libpcsxcore/psxcounters.c -@@ -379,9 +379,12 @@ void psxRcntUpdate() +@@ -389,9 +389,12 @@ void psxRcntUpdate() /******************************************************************************/ @@ -61,7 +62,7 @@ index ff0efbc..4459644 100644 _psxRcntWcount( index, value ); psxRcntSet(); -@@ -390,6 +393,7 @@ void psxRcntWcount( u32 index, u32 value ) +@@ -400,6 +403,7 @@ void psxRcntWcount( u32 index, u32 value ) void psxRcntWmode( u32 index, u32 value ) { verboseLog( 1, "[RCNT %i] wmode: %x\n", index, value ); @@ -69,7 +70,7 @@ index ff0efbc..4459644 100644 _psxRcntWmode( index, value ); _psxRcntWcount( index, 0 ); -@@ -401,6 +405,7 @@ void psxRcntWmode( u32 index, u32 value ) +@@ -411,6 +415,7 @@ void psxRcntWmode( u32 index, u32 value ) void psxRcntWtarget( u32 index, u32 value ) { verboseLog( 1, "[RCNT %i] wtarget: %x\n", index, value ); @@ -77,7 +78,7 @@ index ff0efbc..4459644 100644 rcnts[index].target = value; -@@ -413,6 +418,7 @@ void psxRcntWtarget( u32 index, u32 value ) +@@ -423,6 +428,7 @@ void psxRcntWtarget( u32 index, u32 value ) u32 psxRcntRcount( u32 index ) { u32 count; @@ -86,10 +87,10 @@ index ff0efbc..4459644 100644 count = _psxRcntRcount( index ); diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c -index dbcb989..0716f5e 100644 +index 27ddfeab..d7c6ff05 100644 --- a/libpcsxcore/psxhw.c +++ b/libpcsxcore/psxhw.c -@@ -373,13 +373,14 @@ void psxHwWrite8(u32 add, u8 value) { +@@ -377,13 +377,14 @@ void psxHwWrite8(u32 add, u8 value) { case 0x1f801803: cdrWrite3(value); break; default: @@ -105,7 +106,7 @@ index dbcb989..0716f5e 100644 #ifdef PSXHW_LOG PSXHW_LOG("*Known 8bit write at address %x value %x\n", add, value); #endif -@@ -504,6 +505,7 @@ void psxHwWrite16(u32 add, u16 value) { +@@ -506,6 +507,7 @@ void psxHwWrite16(u32 add, u16 value) { return; } @@ -113,7 +114,7 @@ index dbcb989..0716f5e 100644 psxHu16ref(add) = SWAPu16(value); #ifdef PSXHW_LOG PSXHW_LOG("*Unknown 16bit write at address %x value %x\n", add, value); -@@ -699,9 +701,9 @@ void psxHwWrite32(u32 add, u32 value) { +@@ -701,9 +703,9 @@ void psxHwWrite32(u32 add, u32 value) { return; case 0x1f801820: @@ -125,7 +126,7 @@ index dbcb989..0716f5e 100644 case 0x1f801100: #ifdef PSXHW_LOG -@@ -759,6 +761,7 @@ void psxHwWrite32(u32 add, u32 value) { +@@ -761,6 +763,7 @@ void psxHwWrite32(u32 add, u32 value) { return; } @@ -134,94 +135,99 @@ index dbcb989..0716f5e 100644 #ifdef PSXHW_LOG PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value); diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c -index e7e3269..8f4004d 100644 +index be15f782..6f07478f 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c -@@ -467,6 +467,8 @@ static void doBranch(u32 tar) { - psxRegs.pc += 4; - psxRegs.cycle += BIAS; - -+ (void)tmp; -+#if 0 - // check for load delay - tmp = psxRegs.code >> 26; - switch (tmp) { -@@ -500,13 +502,15 @@ static void doBranch(u32 tar) { - } - break; - } -- -+#endif - psxBSC[psxRegs.code >> 26](); +@@ -237,7 +237,7 @@ static inline void addCycle(psxRegisters *regs) + { + assert(regs->subCycleStep >= 0x10000); + regs->subCycle += regs->subCycleStep; +- regs->cycle += regs->subCycle >> 16; ++ regs->cycle += 2; //regs->subCycle >> 16; + regs->subCycle &= 0xffff; + } - branch = 0; - psxRegs.pc = branchPC; +@@ -434,7 +434,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) { + regs->CP0.n.Target = pc_final; + regs->branching = 0; -+ psxRegs.cycle += BIAS; ++ psxRegs.cycle += 2; psxBranchTest(); -+ psxRegs.cycle -= BIAS; ++ psxRegs.cycle -= 2; } - /********************************************************* -@@ -616,12 +620,13 @@ void psxMULTU_stall() { - psxMULTU(); + static void doBranchReg(psxRegisters *regs, u32 tar) { +@@ -967,7 +969,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) { + } } -+#define doBranchNotTaken() do { psxRegs.cycle += BIAS; execI(); psxBranchTest(); psxRegs.cycle -= BIAS; } while(0) - /********************************************************* - * Register branch logic * - * Format: OP rs, offset * - *********************************************************/ --#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); --#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } } -+#define RepZBranchi32(op) if(_i32(_rRs_) op 0) doBranch(_BranchTarget_); else doBranchNotTaken(); -+#define RepZBranchLinki32(op) { _SetLink(31); if(_i32(_rRs_) op 0) { doBranch(_BranchTarget_); } else doBranchNotTaken(); } - - void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 - void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -@@ -703,7 +708,7 @@ void psxRFE() { - * Register branch logic * - * Format: OP rs, rt, offset * - *********************************************************/ --#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); -+#define RepBranchi32(op) if(_i32(_rRs_) op _i32(_rRt_)) doBranch(_BranchTarget_); else doBranchNotTaken(); - - void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt - void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt -@@ -901,7 +907,7 @@ void MTC0(int reg, u32 val) { - } +-OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); } ++OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); psxBranchTest(); } + + // no exception + static inline void psxNULLne(psxRegisters *regs) { +@@ -1175,18 +1177,19 @@ static void intReset() { + static inline void execI_(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; + +- addCycle(regs); ++ //addCycle(regs); + dloadStep(regs); + + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); ++ psxRegs.cycle += 2; } --void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); } -+void psxMTC0() { MTC0(_Rd_, _u32(_rRt_)); psxBranchTest(); } - void psxCTC0() { MTC0(_Rd_, _u32(_rRt_)); } + static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { + u32 pc = regs->pc; + +- addCycle(regs); ++ //addCycle(regs); + dloadStep(regs); + + if (execBreakCheck(regs, pc)) +@@ -1195,6 +1198,7 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) { + regs->pc += 4; + regs->code = fetch(regs, memRLUT, pc); + psxBSC[regs->code >> 26](regs, regs->code); ++ psxRegs.cycle += 2; + } - /********************************************************* -@@ -1028,6 +1034,23 @@ void intExecuteBlock() { - while (!branch2) execI(); + static void intExecute() { +@@ -1224,6 +1228,30 @@ void intExecuteBlock(enum blockExecCaller caller) { + execI_(memRLUT, regs_); } +extern void do_insn_trace(void); + +void intExecuteT() { -+ for (;;) { ++ psxRegisters *regs_ = &psxRegs; ++ u8 **memRLUT = psxMemRLUT; ++ extern int stop; ++ ++ while (!stop) { + do_insn_trace(); -+ execI(); ++ execIbp(memRLUT, regs_); + } +} + +void intExecuteBlockT() { -+ branch2 = 0; -+ while (!branch2) { ++ psxRegisters *regs_ = &psxRegs; ++ u8 **memRLUT = psxMemRLUT; ++ ++ branchSeen = 0; ++ while (!branchSeen) { + do_insn_trace(); -+ execI(); ++ execIbp(memRLUT, regs_); + } +} + static void intClear(u32 Addr, u32 Size) { } -@@ -1050,7 +1073,7 @@ void intApplyConfig() { +@@ -1271,7 +1299,7 @@ void intApplyConfig() { assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall); assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall); @@ -230,28 +236,16 @@ index e7e3269..8f4004d 100644 psxBSC[18] = psxCOP2; psxBSC[50] = gteLWC2; psxBSC[58] = gteSWC2; -@@ -1092,9 +1115,10 @@ void execI() { - if (Config.Debug) ProcessDebug(); - - psxRegs.pc += 4; -- psxRegs.cycle += BIAS; - - psxBSC[psxRegs.code >> 26](); -+ -+ psxRegs.cycle += BIAS; - } - - R3000Acpu psxInt = { diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c -index 46cee0c..c814587 100644 +index 54219ae0..41168ced 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c -@@ -218,11 +218,13 @@ void psxMemShutdown() { +@@ -278,10 +278,13 @@ void psxMemOnIsolate(int enable) + : R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL); } - static int writeok = 1; +extern u32 last_io_addr; - ++ u8 psxMemRead8(u32 mem) { char *p; u32 t; @@ -260,7 +254,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -248,6 +250,7 @@ u16 psxMemRead16(u32 mem) { +@@ -307,6 +310,7 @@ u16 psxMemRead16(u32 mem) { char *p; u32 t; @@ -268,7 +262,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -273,6 +276,7 @@ u32 psxMemRead32(u32 mem) { +@@ -332,6 +336,7 @@ u32 psxMemRead32(u32 mem) { char *p; u32 t; @@ -276,7 +270,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -298,6 +302,7 @@ void psxMemWrite8(u32 mem, u8 value) { +@@ -359,6 +364,7 @@ void psxMemWrite8(u32 mem, u8 value) { char *p; u32 t; @@ -284,7 +278,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -325,6 +330,7 @@ void psxMemWrite16(u32 mem, u16 value) { +@@ -386,6 +392,7 @@ void psxMemWrite16(u32 mem, u16 value) { char *p; u32 t; @@ -292,7 +286,7 @@ index 46cee0c..c814587 100644 t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { if ((mem & 0xffff) < 0x400) -@@ -352,6 +358,7 @@ void psxMemWrite32(u32 mem, u32 value) { +@@ -413,6 +420,7 @@ void psxMemWrite32(u32 mem, u32 value) { char *p; u32 t; @@ -300,20 +294,20 @@ index 46cee0c..c814587 100644 // if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n"); t = mem >> 16; if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) { -@@ -381,6 +388,8 @@ void psxMemWrite32(u32 mem, u32 value) { - } else { - int i; - +@@ -431,6 +439,8 @@ void psxMemWrite32(u32 mem, u32 value) { + #endif + } else { + if (mem == 0xfffe0130) { +extern u32 handler_cycle; +handler_cycle = psxRegs.cycle; - switch (value) { - case 0x800: case 0x804: - if (writeok == 0) break; + psxRegs.biuReg = value; + return; + } diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c -index 7e6f16b..0114947 100644 +index dffbf6e7..0a3bdb65 100644 --- a/libpcsxcore/r3000a.c +++ b/libpcsxcore/r3000a.c -@@ -120,6 +120,8 @@ void psxException(u32 code, u32 bd) { +@@ -124,6 +124,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) { } void psxBranchTest() { diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index be15f782..e212d8a9 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -454,14 +454,6 @@ static void doBranchRegE(psxRegisters *regs, u32 tar) { doBranch(regs, tar, R3000A_BRANCH_TAKEN); } -#if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) -#define add_overflow(a, b, r) __builtin_add_overflow(a, b, &(r)) -#define sub_overflow(a, b, r) __builtin_sub_overflow(a, b, &(r)) -#else -#define add_overflow(a, b, r) ({r = (u32)a + (u32)b; (a ^ ~b) & (a ^ r) & (1u<<31);}) -#define sub_overflow(a, b, r) ({r = (u32)a - (u32)b; (a ^ b) & (a ^ r) & (1u<<31);}) -#endif - static void addExc(psxRegisters *regs, u32 rt, s32 a1, s32 a2) { s32 val; if (add_overflow(a1, a2, val)) { @@ -1344,12 +1336,14 @@ void intApplyConfig() { } static void intShutdown() { + dloadClear(&psxRegs); } -// single step (may do several ops in case of a branch) +// single step (may do several ops in case of a branch or load delay) void execI(psxRegisters *regs) { - execI_(psxMemRLUT, regs); - dloadFlush(regs); + do { + execIbp(psxMemRLUT, regs); + } while (regs->dloadReg[0] || regs->dloadReg[1]); } R3000Acpu psxInt = { -- 2.39.2