//if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
- if(rt1[i]) {
+ if(rt1[i]&&!((current->u>>rt1[i])&1)) {
alloc_reg(current,i,rt1[i]);
- if(get_reg(current->regmap,rt1[i])<0) {
- // dummy load, but we still need a register to calculate the address
- alloc_reg_temp(current,i,-1);
- minimum_free_regs[i]=1;
- }
+ assert(get_reg(current->regmap,rt1[i])>=0);
if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
{
current->is32&=~(1LL<<rt1[i]);
}
else
{
- // Load to r0 (dummy load)
+ // Load to r0 or unneeded register (dummy load)
// but we still need a register to calculate the address
if(opcode[i]==0x22||opcode[i]==0x26)
{
alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
}
+ // If using TLB, need a register for pointer to the mapping table
+ if(using_tlb) alloc_reg(current,i,TLREG);
alloc_reg_temp(current,i,-1);
minimum_free_regs[i]=1;
if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
map=get_reg(i_regs->regmap,TLREG);
assert(map>=0);
+ reglist&=~(1<<map);
map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
}
if (opcode[i]==0x29) x=2; // SH
map=get_reg(i_regs->regmap,TLREG);
assert(map>=0);
+ reglist&=~(1<<map);
map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
}
}else{ // using tlb
int map=get_reg(i_regs->regmap,TLREG);
assert(map>=0);
+ reglist&=~(1<<map);
map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
if(!c&&!offset&&s>=0) emit_mov(s,temp);
do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
{
map=get_reg(i_regs->regmap,TLREG);
assert(map>=0);
+ reglist&=~(1<<map);
if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
map=do_tlb_r(offset||c||s<0?ar:s,ar,map,0,-1,-1,c,constmap[i][s]+offset);
}
emit_zeroreg(hr);
}
else
- if(i_regmap[hr]>0 && i_regmap[hr]!=CCREG)
+ if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
{
emit_loadreg(i_regmap[hr],hr);
}
emit_zeroreg(hr);
}
else
- if(i_regmap[hr]>0 && i_regmap[hr]!=CCREG)
+ if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
{
emit_loadreg(i_regmap[hr],hr);
}
}
// Load 32-bit regs
for(hr=0;hr<HOST_REGS;hr++) {
- if(regs[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<64) {
+ if(regs[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<TEMPREG) {
if(regs[t].regmap_entry[hr]==0) {
emit_zeroreg(hr);
}
}
// Load 64-bit regs
for(hr=0;hr<HOST_REGS;hr++) {
- if(regs[t].regmap_entry[hr]>=64) {
+ if(regs[t].regmap_entry[hr]>=64&®s[t].regmap_entry[hr]<TEMPREG+64) {
assert(regs[t].regmap_entry[hr]!=64);
if((regs[t].was32>>(regs[t].regmap_entry[hr]&63))&1) {
int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
}
// Load 32-bit regs
for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG&®s[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<64) {
+ if(hr!=EXCLUDE_REG&®s[t].regmap_entry[hr]>=0&®s[t].regmap_entry[hr]<TEMPREG) {
#ifdef DESTRUCTIVE_WRITEBACK
if(i_regmap[hr]!=regs[t].regmap_entry[hr] || ( !((regs[t].dirty>>hr)&1) && ((i_dirty>>hr)&1) && (((i_is32&~unneeded_reg_upper[t])>>i_regmap[hr])&1) ) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
#else
}
//Load 64-bit regs
for(hr=0;hr<HOST_REGS;hr++) {
- if(hr!=EXCLUDE_REG&®s[t].regmap_entry[hr]>=64) {
+ if(hr!=EXCLUDE_REG&®s[t].regmap_entry[hr]>=64&®s[t].regmap_entry[hr]<TEMPREG+64) {
if(i_regmap[hr]!=regs[t].regmap_entry[hr]) {
assert(regs[t].regmap_entry[hr]!=64);
if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
{
if(i_regmap[hr]!=regs[t].regmap_entry[hr])
{
- if(regs[t].regmap_entry[hr]!=-1)
+ if(regs[t].regmap_entry[hr]>=0&&(regs[t].regmap_entry[hr]|64)<TEMPREG+64)
{
return 0;
}
else
if((i_dirty>>hr)&1)
{
- if(i_regmap[hr]<64)
+ if(i_regmap[hr]<TEMPREG)
{
if(!((unneeded_reg[t]>>i_regmap[hr])&1))
return 0;
}
- else
+ else if(i_regmap[hr]>=64&&i_regmap[hr]<TEMPREG+64)
{
if(!((unneeded_reg_upper[t]>>(i_regmap[hr]&63))&1))
return 0;
regs[i].wasdirty|=will_dirty_i&(1<<r);
}
}
- else if((nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
+ else if(regmap_pre[i][r]>=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
// Register moved to a different register
will_dirty_i&=~(1<<r);
wont_dirty_i&=~(1<<r);
// Does the block continue due to a branch?
for(j=i-1;j>=0;j--)
{
+ if(ba[j]==start+i*4) done=j=0; // Branch into delay slot
if(ba[j]==start+i*4+4) done=j=0;
if(ba[j]==start+i*4+8) done=j=0;
}
wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre,
unneeded_reg[i],unneeded_reg_upper[i]);
}
- is32_pre=regs[i].is32;
- dirty_pre=regs[i].dirty;
+ if((itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)&&!likely[i]) {
+ is32_pre=branch_regs[i].is32;
+ dirty_pre=branch_regs[i].dirty;
+ }else{
+ is32_pre=regs[i].is32;
+ dirty_pre=regs[i].dirty;
+ }
#endif
// write back
if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))