uint64_t uu;
u_int wasconst;
u_int isconst;
+ u_int waswritten; // regs that were used as store base before
uint64_t constmap[HOST_REGS];
};
jaddr=0;
}
#endif
- if(!using_tlb) {
+ if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))) {
if(!c||memtarget) {
#ifdef DESTRUCTIVE_SHIFT
// The x86 shift operation is 'destructive'; it overwrites the
}
if(!c||!memtarget)
add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
- if(!using_tlb) {
+ if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))) {
#ifdef RAM_OFFSET
int map=get_reg(i_regs->regmap,ROREG);
if(map<0) map=HOST_TEMPREG;
emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp);
type=STORED_STUB;
}
- if(!using_tlb) {
+ if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))) {
if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
#ifndef DESTRUCTIVE_SHIFT
temp=offset||c||s<0?ar:s;
}
if(jaddr2)
add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist);
- if (opcode[i]==0x3a) { // SWC2
+ if (!(i_regs->waswritten&(1<<rs1[i]))&&opcode[i]==0x3a) { // SWC2
#if defined(HOST_IMM8)
int ir=get_reg(i_regs->regmap,INVCP);
assert(ir>=0);
//assert(adj==0);
emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0);
+#ifdef PCSX
+ if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10)
+ // special case for RFE
+ emit_jmp(0);
+ else
+#endif
emit_jns(0);
//load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1);
#ifdef USE_MINI_HT
rs2[i]=0;
rt1[i]=0;
rt2[i]=0;
- gte_rt[i]=1ll<<63; // every op changes flags
- // TODO: other regs?
+ gte_rs[i]=gte_reg_reads[source[i]&0x3f];
+ gte_rt[i]=gte_reg_writes[source[i]&0x3f];
+ gte_rt[i]|=1ll<<63; // every op changes flags
break;
case FLOAT:
case FCONV:
dirty_reg(¤t,CCREG);
current.isconst=0;
current.wasconst=0;
+ current.waswritten=0;
int ds=0;
int cc=0;
int hr=-1;
if(current.regmap[hr]==0) current.regmap[hr]=-1;
}
current.isconst=0;
+ current.waswritten=0;
}
if(i>1)
{
}
memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap));
}
+
+ if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800)
+ current.waswritten|=1<<rs1[i-1];
+ current.waswritten&=~(1<<rt1[i]);
+ current.waswritten&=~(1<<rt2[i]);
+ if((itype[i]==STORE||itype[i]==STORELR||(itype[i]==C2LS&&opcode[i]==0x3a))&&(u_int)imm[i]>=0x800)
+ current.waswritten&=~(1<<rs1[i]);
+
/* Branch post-alloc */
if(i>0)
{
cc=0;
}
#ifdef PCSX
+ else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2)
+ {
+ // GTE runs in parallel until accessed, divide by 2 for a rough guess
+ cc+=gte_cycletab[source[i]&0x3f]/2;
+ }
else if(/*itype[i]==LOAD||*/itype[i]==STORE||itype[i]==C1LS) // load causes weird timing issues
{
cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER)
}
}
if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1;
+ regs[i].waswritten=current.waswritten;
}
/* Pass 4 - Cull unused host registers */