+}
+
+// Write back dirty registers as soon as we will no longer modify them,
+// so that we don't end up with lots of writes at the branches.
+static noinline void pass6_clean_registers(int istart, int iend, int wr)
+{
+ static u_int wont_dirty[MAXBLOCK];
+ static u_int will_dirty[MAXBLOCK];
+ int i;
+ int r;
+ u_int will_dirty_i,will_dirty_next,temp_will_dirty;
+ u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty;
+ if(iend==slen-1) {
+ will_dirty_i=will_dirty_next=0;
+ wont_dirty_i=wont_dirty_next=0;
+ }else{
+ will_dirty_i=will_dirty_next=will_dirty[iend+1];
+ wont_dirty_i=wont_dirty_next=wont_dirty[iend+1];
+ }
+ for (i=iend;i>=istart;i--)
+ {
+ signed char rregmap_i[RRMAP_SIZE];
+ u_int hr_candirty = 0;
+ assert(HOST_REGS < 32);
+ make_rregs(regs[i].regmap, rregmap_i, &hr_candirty);
+ __builtin_prefetch(regs[i-1].regmap);
+ if(dops[i].is_jump)
+ {
+ signed char branch_rregmap_i[RRMAP_SIZE];
+ u_int branch_hr_candirty = 0;
+ make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty);
+ if(ba[i]<start || ba[i]>=(start+slen*4))
+ {
+ // Branch out of this block, flush all regs
+ will_dirty_i = 0;
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ will_dirty_i &= branch_hr_candirty;
+ if (dops[i].is_ujump)
+ {
+ // Unconditional branch
+ wont_dirty_i = 0;
+ // Merge in delay slot (will dirty)
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ will_dirty_i &= hr_candirty;
+ }
+ else
+ {
+ // Conditional branch
+ wont_dirty_i = wont_dirty_next;
+ // Merge in delay slot (will dirty)
+ // (the original code had no explanation why these 2 are commented out)
+ //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ will_dirty_i &= hr_candirty;
+ }
+ // Merge in delay slot (wont dirty)
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ wont_dirty_i &= ~(1u << 31);
+ if(wr) {
+ #ifndef DESTRUCTIVE_WRITEBACK
+ branch_regs[i].dirty&=wont_dirty_i;
+ #endif
+ branch_regs[i].dirty|=will_dirty_i;
+ }
+ }
+ else
+ {
+ // Internal branch
+ if(ba[i]<=start+i*4) {
+ // Backward branch
+ if (dops[i].is_ujump)
+ {
+ // Unconditional branch
+ temp_will_dirty=0;
+ temp_wont_dirty=0;
+ // Merge in delay slot (will dirty)
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ temp_will_dirty &= branch_hr_candirty;
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ temp_will_dirty &= hr_candirty;
+ } else {
+ // Conditional branch (not taken case)
+ temp_will_dirty=will_dirty_next;
+ temp_wont_dirty=wont_dirty_next;
+ // Merge in delay slot (will dirty)
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ temp_will_dirty &= branch_hr_candirty;
+ //temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ //temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ temp_will_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ temp_will_dirty &= hr_candirty;
+ }
+ // Merge in delay slot (wont dirty)
+ temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ temp_wont_dirty &= ~(1u << 31);
+ // Deal with changed mappings
+ if(i<iend) {
+ for(r=0;r<HOST_REGS;r++) {
+ if(r!=EXCLUDE_REG) {
+ if(regs[i].regmap[r]!=regmap_pre[i][r]) {
+ temp_will_dirty&=~(1<<r);
+ temp_wont_dirty&=~(1<<r);
+ if(regmap_pre[i][r]>0 && regmap_pre[i][r]<34) {
+ temp_will_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
+ temp_wont_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
+ } else {
+ temp_will_dirty|=1<<r;
+ temp_wont_dirty|=1<<r;
+ }
+ }
+ }
+ }
+ }
+ if(wr) {
+ will_dirty[i]=temp_will_dirty;
+ wont_dirty[i]=temp_wont_dirty;
+ pass6_clean_registers((ba[i]-start)>>2,i-1,0);
+ }else{
+ // Limit recursion. It can take an excessive amount
+ // of time if there are a lot of nested loops.
+ will_dirty[(ba[i]-start)>>2]=0;
+ wont_dirty[(ba[i]-start)>>2]=-1;
+ }
+ }
+ /*else*/ if(1)
+ {
+ if (dops[i].is_ujump)
+ {
+ // Unconditional branch
+ will_dirty_i=0;
+ wont_dirty_i=0;
+ //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
+ for(r=0;r<HOST_REGS;r++) {
+ if(r!=EXCLUDE_REG) {
+ if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
+ will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<<r);
+ wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
+ }
+ if(branch_regs[i].regmap[r]>=0) {
+ will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>branch_regs[i].regmap[r])&1)<<r;
+ wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>branch_regs[i].regmap[r])&1)<<r;
+ }
+ }
+ }
+ //}
+ // Merge in delay slot
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ will_dirty_i &= branch_hr_candirty;
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ will_dirty_i &= hr_candirty;
+ } else {
+ // Conditional branch
+ will_dirty_i=will_dirty_next;
+ wont_dirty_i=wont_dirty_next;
+ //if(ba[i]>start+i*4) // Disable recursion (for debugging)
+ for(r=0;r<HOST_REGS;r++) {
+ if(r!=EXCLUDE_REG) {
+ signed char target_reg=branch_regs[i].regmap[r];
+ if(target_reg==regs[(ba[i]-start)>>2].regmap_entry[r]) {
+ will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<<r);
+ wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
+ }
+ else if(target_reg>=0) {
+ will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>target_reg)&1)<<r;
+ wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>target_reg)&1)<<r;
+ }
+ }
+ }
+ // Merge in delay slot
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ will_dirty_i &= branch_hr_candirty;
+ //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ will_dirty_i &= hr_candirty;
+ }
+ // Merge in delay slot (won't dirty)
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
+ wont_dirty_i &= ~(1u << 31);
+ if(wr) {
+ #ifndef DESTRUCTIVE_WRITEBACK
+ branch_regs[i].dirty&=wont_dirty_i;
+ #endif
+ branch_regs[i].dirty|=will_dirty_i;
+ }
+ }
+ }
+ }
+ else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL)
+ {
+ // SYSCALL instruction (software interrupt)
+ will_dirty_i=0;
+ wont_dirty_i=0;
+ }
+ else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18)
+ {
+ // ERET instruction (return from interrupt)
+ will_dirty_i=0;
+ wont_dirty_i=0;
+ }
+ will_dirty_next=will_dirty_i;
+ wont_dirty_next=wont_dirty_i;
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ will_dirty_i &= hr_candirty;
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
+ wont_dirty_i &= ~(1u << 31);
+ if (i > istart && !dops[i].is_jump) {
+ // Don't store a register immediately after writing it,
+ // may prevent dual-issue.
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt1) & 31);
+ wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt2) & 31);
+ }
+ // Save it
+ will_dirty[i]=will_dirty_i;
+ wont_dirty[i]=wont_dirty_i;
+ // Mark registers that won't be dirtied as not dirty
+ if(wr) {
+ regs[i].dirty|=will_dirty_i;
+ #ifndef DESTRUCTIVE_WRITEBACK
+ regs[i].dirty&=wont_dirty_i;
+ if(dops[i].is_jump)
+ {
+ if (i < iend-1 && !dops[i].is_ujump) {
+ for(r=0;r<HOST_REGS;r++) {
+ if(r!=EXCLUDE_REG) {
+ if(regs[i].regmap[r]==regmap_pre[i+2][r]) {
+ regs[i+2].wasdirty&=wont_dirty_i|~(1<<r);
+ }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
+ }
+ }
+ }
+ }
+ else
+ {
+ if(i<iend) {
+ for(r=0;r<HOST_REGS;r++) {
+ if(r!=EXCLUDE_REG) {
+ if(regs[i].regmap[r]==regmap_pre[i+1][r]) {
+ regs[i+1].wasdirty&=wont_dirty_i|~(1<<r);
+ }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
+ }
+ }
+ }
+ }
+ #endif
+ }
+ // Deal with changed mappings
+ temp_will_dirty=will_dirty_i;
+ temp_wont_dirty=wont_dirty_i;
+ for(r=0;r<HOST_REGS;r++) {
+ if(r!=EXCLUDE_REG) {
+ int nr;
+ if(regs[i].regmap[r]==regmap_pre[i][r]) {
+ if(wr) {
+ #ifndef DESTRUCTIVE_WRITEBACK
+ regs[i].wasdirty&=wont_dirty_i|~(1<<r);
+ #endif
+ regs[i].wasdirty|=will_dirty_i&(1<<r);
+ }
+ }
+ else if(regmap_pre[i][r]>=0&&(nr=get_rreg(rregmap_i,regmap_pre[i][r]))>=0) {
+ // Register moved to a different register
+ will_dirty_i&=~(1<<r);
+ wont_dirty_i&=~(1<<r);
+ will_dirty_i|=((temp_will_dirty>>nr)&1)<<r;
+ wont_dirty_i|=((temp_wont_dirty>>nr)&1)<<r;
+ if(wr) {
+ #ifndef DESTRUCTIVE_WRITEBACK
+ regs[i].wasdirty&=wont_dirty_i|~(1<<r);
+ #endif
+ regs[i].wasdirty|=will_dirty_i&(1<<r);
+ }
+ }
+ else {
+ will_dirty_i&=~(1<<r);
+ wont_dirty_i&=~(1<<r);
+ if(regmap_pre[i][r]>0 && regmap_pre[i][r]<34) {
+ will_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
+ wont_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
+ } else {
+ wont_dirty_i|=1<<r;
+ /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);assert(!((will_dirty>>r)&1));*/
+ }
+ }
+ }
+ }
+ }
+}