static int ds_writes_rjump_rs(int i)
{
- return dops[i].rs1 != 0 && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2);
+ return dops[i].rs1 != 0
+ && (dops[i].rs1 == dops[i+1].rt1 || dops[i].rs1 == dops[i+1].rt2
+ || dops[i].rs1 == dops[i].rt1); // overwrites itself - same effect
}
// psx addr mirror masking (for invalidation)
return ndrc_get_addr_ht(vaddr);
// generate an address error
+#ifdef DRC_DBG
+ last_count -= 2;
+#endif
psxRegs.CP0.n.Cause &= 0x300;
- psxRegs.CP0.n.Cause |= R3000E_AdEL << 2;
psxRegs.CP0.n.EPC = vaddr;
+ if (vaddr & 3) {
+ psxRegs.CP0.n.Cause |= R3000E_AdEL << 2;
+ psxRegs.CP0.n.BadVAddr = vaddr;
+ } else
+ psxRegs.CP0.n.Cause |= R3000E_IBE << 2;
psxRegs.pc = 0x80000080;
return ndrc_get_addr_ht(0x80000080);
}
}
hr++;
}
- if((dops[i].opcode&0x2E)==6) // BLEZ/BGTZ needs another register
+ if ((dops[i].opcode & 0x3e) == 6) // BLEZ/BGTZ needs another register
{
while(hr<HOST_REGS)
{
}
assert(hr<HOST_REGS);
}
- if((dops[i].opcode&0x2f)==4) // BEQ
+ if (dops[i].opcode == 4) // BEQ
{
#ifdef HAVE_CMOV_IMM
if(s2l>=0) emit_cmp(s1l,s2l);
emit_cmovne_reg(alt,addr);
#endif
}
- if((dops[i].opcode&0x2f)==5) // BNE
+ else if (dops[i].opcode == 5) // BNE
{
#ifdef HAVE_CMOV_IMM
if(s2l>=0) emit_cmp(s1l,s2l);
emit_cmovne_reg(alt,addr);
#endif
}
- if((dops[i].opcode&0x2f)==6) // BLEZ
+ else if (dops[i].opcode == 6) // BLEZ
{
//emit_movimm(cinfo[i].ba,alt);
//emit_movimm(start+i*4+8,addr);
emit_cmpimm(s1l,1);
emit_cmovl_reg(alt,addr);
}
- if((dops[i].opcode&0x2f)==7) // BGTZ
+ else if (dops[i].opcode == 7) // BGTZ
{
//emit_movimm(cinfo[i].ba,addr);
//emit_movimm(start+i*4+8,ntaddr);
emit_cmpimm(s1l,1);
emit_cmovl_reg(ntaddr,addr);
}
- if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==0) // BLTZ
+ else if (dops[i].itype == SJUMP) // BLTZ/BGEZ
{
//emit_movimm(cinfo[i].ba,alt);
//emit_movimm(start+i*4+8,addr);
- emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr);
+ emit_mov2imm_compact(cinfo[i].ba,
+ (dops[i].opcode2 & 1) ? addr : alt, start + i*4 + 8,
+ (dops[i].opcode2 & 1) ? alt : addr);
emit_test(s1l,s1l);
emit_cmovs_reg(alt,addr);
}
- if((dops[i].opcode==1)&&(dops[i].opcode2&0x2D)==1) // BGEZ
- {
- //emit_movimm(cinfo[i].ba,addr);
- //emit_movimm(start+i*4+8,alt);
- emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt);
- emit_test(s1l,s1l);
- emit_cmovs_reg(alt,addr);
- }
- if(dops[i].opcode==0x11 && dops[i].opcode2==0x08 ) {
- if(source[i]&0x10000) // BC1T
- {
- //emit_movimm(cinfo[i].ba,alt);
- //emit_movimm(start+i*4+8,addr);
- emit_mov2imm_compact(cinfo[i].ba,alt,start+i*4+8,addr);
- emit_testimm(s1l,0x800000);
- emit_cmovne_reg(alt,addr);
- }
- else // BC1F
- {
- //emit_movimm(cinfo[i].ba,addr);
- //emit_movimm(start+i*4+8,alt);
- emit_mov2imm_compact(cinfo[i].ba,addr,start+i*4+8,alt);
- emit_testimm(s1l,0x800000);
- emit_cmovne_reg(alt,addr);
- }
- }
- emit_writeword(addr,&pcaddr);
+ emit_writeword(addr, &pcaddr);
}
else
if(dops[i].itype==RJUMP)
if(adj&&!invert) emit_addimm(cc, cinfo[i].ccadj + CLOCK_ADJUST(2) - adj, cc);
{
assert(s1l>=0);
- if((dops[i].opcode2&0xf)==0) // BLTZ/BLTZAL
+ if ((dops[i].opcode2 & 1) == 0) // BLTZ/BLTZAL
{
emit_test(s1l,s1l);
if(invert){
emit_js(0);
}
}
- if((dops[i].opcode2&0xf)==1) // BGEZ/BLTZAL
+ else // BGEZ/BGEZAL
{
emit_test(s1l,s1l);
if(invert){
// In-order execution (branch first)
//printf("IOE\n");
void *nottaken = NULL;
- if(dops[i].rt1==31) {
- int rt,return_address;
- rt=get_reg(branch_regs[i].regmap,31);
- if(rt>=0) {
+ if (!unconditional) {
+ assert(s1l >= 0);
+ emit_test(s1l, s1l);
+ }
+ if (dops[i].rt1 == 31) {
+ int rt, return_address;
+ rt = get_reg(branch_regs[i].regmap,31);
+ if(rt >= 0) {
// Save the PC even if the branch is not taken
- return_address=start+i*4+8;
- emit_movimm(return_address,rt); // PC into link register
+ return_address = start + i*4+8;
+ emit_movimm(return_address, rt); // PC into link register
#ifdef IMM_PREFETCH
emit_prefetch(hash_table_get(return_address));
#endif
}
}
- if(!unconditional) {
- //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
- assert(s1l>=0);
- if((dops[i].opcode2&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
- {
- emit_test(s1l,s1l);
- nottaken=out;
- emit_jns(DJT_1);
- }
- if((dops[i].opcode2&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
- {
- emit_test(s1l,s1l);
- nottaken=out;
- emit_js(DJT_1);
- }
- } // if(!unconditional)
+ if (!unconditional) {
+ nottaken = out;
+ if (!(dops[i].opcode2 & 1)) // BLTZ/BLTZAL
+ emit_jns(DJT_1);
+ else // BGEZ/BGEZAL
+ emit_js(DJT_1);
+ }
int adj;
uint64_t ds_unneeded=branch_regs[i].u;
ds_unneeded&=~((1LL<<dops[i+1].rs1)|(1LL<<dops[i+1].rs2));
case SJUMP:
printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break;
case RJUMP:
- if (dops[i].opcode==0x9&&dops[i].rt1!=31)
+ if (dops[i].opcode2 == 9 && dops[i].rt1 != 31)
printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1);
else
printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rs1);
for (i = 0; !done; i++)
{
- int force_prev_to_interpreter = 0;
+ int force_j_to_interpreter = 0;
memset(&dops[i], 0, sizeof(dops[i]));
memset(&cinfo[i], 0, sizeof(cinfo[i]));
cinfo[i].ba = -1;
/* rare messy cases to just pass over to the interpreter */
if (i > 0 && dops[i-1].is_jump) {
+ j = i - 1;
// branch in delay slot?
if (dops[i].is_jump) {
// don't handle first branch and call interpreter if it's hit
SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start);
- force_prev_to_interpreter = 1;
+ force_j_to_interpreter = 1;
}
// basic load delay detection through a branch
else if (dops[i].is_delay_load && dops[i].rt1 != 0) {
if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) {
// jump target wants DS result - potential load delay effect
SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start);
- force_prev_to_interpreter = 1;
+ force_j_to_interpreter = 1;
dops[t+1].bt=1; // expected return from interpreter
}
else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&&
!(i>=3&&dops[i-3].is_jump)) {
// v0 overwrite like this is a sign of trouble, bail out
SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start);
- force_prev_to_interpreter = 1;
+ force_j_to_interpreter = 1;
}
}
}
else if (i > 0 && dops[i-1].is_delay_load && dops[i-1].rt1 != 0
&& (dops[i].rs1 == dops[i-1].rt1 || dops[i].rs2 == dops[i-1].rt1)) {
SysPrintf("load delay @%08x (%08x)\n", start + i*4, start);
- force_prev_to_interpreter = 1;
+ for (j = i - 1; j > 0 && dops[j-1].is_delay_load; j--)
+ if (dops[j-1].rt1 != dops[i-1].rt1)
+ break;
+ force_j_to_interpreter = 1;
}
- if (force_prev_to_interpreter) {
- memset(&dops[i-1], 0, sizeof(dops[i-1]));
- dops[i-1].itype = INTCALL;
- dops[i-1].rs1 = CCREG;
- cinfo[i-1].ba = -1;
+ if (force_j_to_interpreter) {
+ memset(&dops[j], 0, sizeof(dops[j]));
+ dops[j].itype = INTCALL;
+ dops[j].rs1 = CCREG;
+ cinfo[j].ba = -1;
done = 2;
- i--; // don't compile the DS/problematic load/etc
+ i = j; // don't compile the problematic branch/load/etc
}
/* Is this the end of the block? */
alloc_cc(¤t,i);
dirty_reg(¤t,CCREG);
alloc_reg(¤t,i,dops[i].rs1);
- if (dops[i].rt1==31) { // BLTZAL/BGEZAL
+ if (dops[i].rt1 == 31) { // BLTZAL/BGEZAL
alloc_reg(¤t,i,31);
dirty_reg(¤t,31);
}
- if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition.
+ if ((dops[i].rs1 &&
+ (dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition.
+ ||(dops[i].rt1 == 31 && dops[i].rs1 == 31) // overwrites it's own condition
||(dops[i].rt1==31&&(dops[i+1].rs1==31||dops[i+1].rs2==31||dops[i+1].rt1==31||dops[i+1].rt2==31))) { // DS touches $ra
// Allocate the branch condition registers instead.
current.isconst=0;
diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c
-index b71f8a8f..0a26f6f6 100644
+index 2d3348e8..a85d2cd4 100644
--- a/libpcsxcore/new_dynarec/new_dynarec.c
+++ b/libpcsxcore/new_dynarec/new_dynarec.c
@@ -318,7 +318,7 @@ static struct compile_info
int m = cycle_multiplier_active;
int s = (x >> 31) | 1;
return (x * m + s * 50) / 100;
-@@ -750,6 +751,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page)
+@@ -752,6 +753,9 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page)
// This is called from the recompiled JR/JALR instructions
static void noinline *get_addr(u_int vaddr, int can_compile)
{
u_int start_page = get_page_prev(vaddr);
u_int i, page, end_page = get_page(vaddr);
void *found_clean = NULL;
-@@ -7180,7 +7184,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r)
+@@ -7164,7 +7168,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r)
// R0 is always unneeded
u|=1;
// Save it
gte_unneeded[i]=gte_u;
/*
printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
-@@ -8329,6 +8333,7 @@ static noinline void pass5a_preallocate1(void)
+@@ -8315,6 +8319,7 @@ static noinline void pass5a_preallocate1(void)
static noinline void pass5b_preallocate2(void)
{
int i, hr;
for(i=0;i<slen-1;i++)
{
if (!i || !dops[i-1].is_jump)
-@@ -9138,6 +9143,14 @@ static int new_recompile_block(u_int addr)
+@@ -9124,6 +9129,14 @@ static int new_recompile_block(u_int addr)
load_reg(regs[i].regmap_entry,regs[i].regmap,INVCP);
ds = assemble(i, ®s[i], cinfo[i].ccadj);
+#ifdef DRC_DBG
+ // write-out non-consts, consts are likely different because of get_final_value()
-+ if (!dops[i].is_jump && (regs[i].dirty&~regs[i].loadedconst)) {
++ if (regs[i].dirty & ~regs[i].loadedconst) {
+ assem_debug("/ drc_dbg_wb\n");
-+ wb_dirtys(regs[i].regmap,regs[i].dirty&~regs[i].loadedconst);
++ wb_dirtys(regs[i].regmap, regs[i].dirty & ~regs[i].loadedconst);
+ assem_debug("\\ drc_dbg_wb\n");
+ }
+#endif
if (dops[i].is_ujump)
literal_pool(1024);
-@@ -9330,6 +9343,10 @@ static int new_recompile_block(u_int addr)
+@@ -9316,6 +9329,10 @@ static int new_recompile_block(u_int addr)
#ifdef ASSEM_PRINT
fflush(stdout);