- signed char branch_rregmap_i[RRMAP_SIZE];
- u_int branch_hr_candirty = 0;
- make_rregs(branch_regs[i].regmap, branch_rregmap_i, &branch_hr_candirty);
- if(ba[i]<start || ba[i]>=(start+slen*4))
- {
- // Branch out of this block, flush all regs
- will_dirty_i = 0;
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- will_dirty_i &= branch_hr_candirty;
- if (dops[i].is_ujump)
- {
- // Unconditional branch
- wont_dirty_i = 0;
- // Merge in delay slot (will dirty)
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- will_dirty_i &= hr_candirty;
- }
- else
- {
- // Conditional branch
- wont_dirty_i = wont_dirty_next;
- // Merge in delay slot (will dirty)
- // (the original code had no explanation why these 2 are commented out)
- //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- will_dirty_i &= hr_candirty;
- }
- // Merge in delay slot (wont dirty)
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- wont_dirty_i &= ~(1u << 31);
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- branch_regs[i].dirty&=wont_dirty_i;
- #endif
- branch_regs[i].dirty|=will_dirty_i;
- }
- }
- else
- {
- // Internal branch
- if(ba[i]<=start+i*4) {
- // Backward branch
- if (dops[i].is_ujump)
- {
- // Unconditional branch
- temp_will_dirty=0;
- temp_wont_dirty=0;
- // Merge in delay slot (will dirty)
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- temp_will_dirty &= branch_hr_candirty;
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- temp_will_dirty &= hr_candirty;
- } else {
- // Conditional branch (not taken case)
- temp_will_dirty=will_dirty_next;
- temp_wont_dirty=wont_dirty_next;
- // Merge in delay slot (will dirty)
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- temp_will_dirty &= branch_hr_candirty;
- //temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- //temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- temp_will_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- temp_will_dirty &= hr_candirty;
- }
- // Merge in delay slot (wont dirty)
- temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- temp_wont_dirty |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- temp_wont_dirty |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- temp_wont_dirty |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- temp_wont_dirty &= ~(1u << 31);
- // Deal with changed mappings
- if(i<iend) {
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(regs[i].regmap[r]!=regmap_pre[i][r]) {
- temp_will_dirty&=~(1<<r);
- temp_wont_dirty&=~(1<<r);
- if(regmap_pre[i][r]>0 && regmap_pre[i][r]<34) {
- temp_will_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
- temp_wont_dirty|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
- } else {
- temp_will_dirty|=1<<r;
- temp_wont_dirty|=1<<r;
- }
- }
- }
- }
- }
- if(wr) {
- will_dirty[i]=temp_will_dirty;
- wont_dirty[i]=temp_wont_dirty;
- clean_registers((ba[i]-start)>>2,i-1,0);
- }else{
- // Limit recursion. It can take an excessive amount
- // of time if there are a lot of nested loops.
- will_dirty[(ba[i]-start)>>2]=0;
- wont_dirty[(ba[i]-start)>>2]=-1;
- }
- }
- /*else*/ if(1)
- {
- if (dops[i].is_ujump)
- {
- // Unconditional branch
- will_dirty_i=0;
- wont_dirty_i=0;
- //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
- will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<<r);
- wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
- }
- if(branch_regs[i].regmap[r]>=0) {
- will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>branch_regs[i].regmap[r])&1)<<r;
- wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>branch_regs[i].regmap[r])&1)<<r;
- }
- }
- }
- //}
- // Merge in delay slot
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- will_dirty_i &= branch_hr_candirty;
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- will_dirty_i &= hr_candirty;
- } else {
- // Conditional branch
- will_dirty_i=will_dirty_next;
- wont_dirty_i=wont_dirty_next;
- //if(ba[i]>start+i*4) // Disable recursion (for debugging)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- signed char target_reg=branch_regs[i].regmap[r];
- if(target_reg==regs[(ba[i]-start)>>2].regmap_entry[r]) {
- will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<<r);
- wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
- }
- else if(target_reg>=0) {
- will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>target_reg)&1)<<r;
- wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>target_reg)&1)<<r;
- }
- }
- }
- // Merge in delay slot
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- will_dirty_i &= branch_hr_candirty;
- //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- //will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- will_dirty_i &= hr_candirty;
- }
- // Merge in delay slot (won't dirty)
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i+1].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, dops[i+1].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(branch_rregmap_i, CCREG) & 31);
- wont_dirty_i &= ~(1u << 31);
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- branch_regs[i].dirty&=wont_dirty_i;
- #endif
- branch_regs[i].dirty|=will_dirty_i;
- }
- }
- }
- }
- else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL)
- {
- // SYSCALL instruction (software interrupt)
- will_dirty_i=0;
- wont_dirty_i=0;
- }
- else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
- will_dirty_i=0;
- wont_dirty_i=0;
- }
- will_dirty_next=will_dirty_i;
- wont_dirty_next=wont_dirty_i;
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- will_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- will_dirty_i &= hr_candirty;
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i].rt2) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, CCREG) & 31);
- wont_dirty_i &= ~(1u << 31);
- if (i > istart && !dops[i].is_jump) {
- // Don't store a register immediately after writing it,
- // may prevent dual-issue.
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt1) & 31);
- wont_dirty_i |= 1u << (get_rreg(rregmap_i, dops[i-1].rt2) & 31);
- }
- // Save it
- will_dirty[i]=will_dirty_i;
- wont_dirty[i]=wont_dirty_i;
- // Mark registers that won't be dirtied as not dirty
- if(wr) {
- regs[i].dirty|=will_dirty_i;
- #ifndef DESTRUCTIVE_WRITEBACK
- regs[i].dirty&=wont_dirty_i;
- if(dops[i].is_jump)
- {
- if (i < iend-1 && !dops[i].is_ujump) {
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(regs[i].regmap[r]==regmap_pre[i+2][r]) {
- regs[i+2].wasdirty&=wont_dirty_i|~(1<<r);
- }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
- }
- }
- }
- }
- else
- {
- if(i<iend) {
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(regs[i].regmap[r]==regmap_pre[i+1][r]) {
- regs[i+1].wasdirty&=wont_dirty_i|~(1<<r);
- }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
- }
- }
- }
- }
- #endif
- }
- // Deal with changed mappings
- temp_will_dirty=will_dirty_i;
- temp_wont_dirty=wont_dirty_i;
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- int nr;
- if(regs[i].regmap[r]==regmap_pre[i][r]) {
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- regs[i].wasdirty&=wont_dirty_i|~(1<<r);
- #endif
- regs[i].wasdirty|=will_dirty_i&(1<<r);
- }
- }
- else if(regmap_pre[i][r]>=0&&(nr=get_rreg(rregmap_i,regmap_pre[i][r]))>=0) {
- // Register moved to a different register
- will_dirty_i&=~(1<<r);
- wont_dirty_i&=~(1<<r);
- will_dirty_i|=((temp_will_dirty>>nr)&1)<<r;
- wont_dirty_i|=((temp_wont_dirty>>nr)&1)<<r;
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- regs[i].wasdirty&=wont_dirty_i|~(1<<r);
- #endif
- regs[i].wasdirty|=will_dirty_i&(1<<r);
- }
- }
- else {
- will_dirty_i&=~(1<<r);
- wont_dirty_i&=~(1<<r);
- if(regmap_pre[i][r]>0 && regmap_pre[i][r]<34) {
- will_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
- wont_dirty_i|=((unneeded_reg[i]>>regmap_pre[i][r])&1)<<r;
- } else {
- wont_dirty_i|=1<<r;
- /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);assert(!((will_dirty>>r)&1));*/
- }
- }
- }
- }
- }
-}
-
-#ifdef DISASM
-#include <inttypes.h>
-static char insn[MAXBLOCK][10];
-
-#define set_mnemonic(i_, n_) \
- strcpy(insn[i_], n_)
-
-void print_regmap(const char *name, const signed char *regmap)
-{
- char buf[5];
- int i, l;
- fputs(name, stdout);
- for (i = 0; i < HOST_REGS; i++) {
- l = 0;
- if (regmap[i] >= 0)
- l = snprintf(buf, sizeof(buf), "$%d", regmap[i]);
- for (; l < 3; l++)
- buf[l] = ' ';
- buf[l] = 0;
- printf(" r%d=%s", i, buf);
- }
- fputs("\n", stdout);
-}
-
- /* disassembly */
-void disassemble_inst(int i)
-{
- if (dops[i].bt) printf("*"); else printf(" ");
- switch(dops[i].itype) {
- case UJUMP:
- printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
- case CJUMP:
- printf (" %x: %s r%d,r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2,i?start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14):*ba);break;
- case SJUMP:
- printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break;
- case RJUMP:
- if (dops[i].opcode==0x9&&dops[i].rt1!=31)
- printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1);
- else
- printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rs1);
- break;
- case SPAN:
- printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2,ba[i]);break;
- case IMM16:
- if(dops[i].opcode==0xf) //LUI
- printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],dops[i].rt1,imm[i]&0xffff);
- else
- printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]);
- break;
- case LOAD:
- case LOADLR:
- printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]);
- break;
- case STORE:
- case STORELR:
- printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],dops[i].rs2,dops[i].rs1,imm[i]);
- break;
- case ALU:
- case SHIFT:
- printf (" %x: %s r%d,r%d,r%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,dops[i].rs2);
- break;
- case MULTDIV:
- printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],dops[i].rs1,dops[i].rs2);
- break;
- case SHIFTIMM:
- printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],dops[i].rt1,dops[i].rs1,imm[i]);
- break;
- case MOV:
- if((dops[i].opcode2&0x1d)==0x10)
- printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rt1);
- else if((dops[i].opcode2&0x1d)==0x11)
- printf (" %x: %s r%d\n",start+i*4,insn[i],dops[i].rs1);
- else
- printf (" %x: %s\n",start+i*4,insn[i]);
- break;
- case COP0:
- if(dops[i].opcode2==0)
- printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC0
- else if(dops[i].opcode2==4)
- printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC0
- else printf (" %x: %s\n",start+i*4,insn[i]);
- break;
- case COP1:
- if(dops[i].opcode2<3)
- printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC1
- else if(dops[i].opcode2>3)
- printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC1
- else printf (" %x: %s\n",start+i*4,insn[i]);
- break;
- case COP2:
- if(dops[i].opcode2<3)
- printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC2
- else if(dops[i].opcode2>3)
- printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC2
- else printf (" %x: %s\n",start+i*4,insn[i]);
- break;
- case C1LS:
- printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]);
- break;
- case C2LS:
- printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]);
- break;
- case INTCALL:
- printf (" %x: %s (INTCALL)\n",start+i*4,insn[i]);
- break;
- default:
- //printf (" %s %8x\n",insn[i],source[i]);
- printf (" %x: %s\n",start+i*4,insn[i]);
- }
- return;
- printf("D: %"PRIu64" WD: %"PRIu64" U: %"PRIu64"\n",
- regs[i].dirty, regs[i].wasdirty, unneeded_reg[i]);
- print_regmap("pre: ", regmap_pre[i]);
- print_regmap("entry: ", regs[i].regmap_entry);
- print_regmap("map: ", regs[i].regmap);
- if (dops[i].is_jump) {
- print_regmap("bentry:", branch_regs[i].regmap_entry);
- print_regmap("bmap: ", branch_regs[i].regmap);
- }
-}
-#else
-#define set_mnemonic(i_, n_)
-static void disassemble_inst(int i) {}
-#endif // DISASM
-
-#define DRC_TEST_VAL 0x74657374
-
-static void new_dynarec_test(void)
-{
- int (*testfunc)(void);
- void *beginning;
- int ret[2];
- size_t i;
-
- // check structure linkage
- if ((u_char *)rcnts - (u_char *)&psxRegs != sizeof(psxRegs))
- {
- SysPrintf("linkage_arm* miscompilation/breakage detected.\n");
- }
-
- SysPrintf("testing if we can run recompiled code @%p...\n", out);
- ((volatile u_int *)out)[0]++; // make cache dirty
-
- for (i = 0; i < ARRAY_SIZE(ret); i++) {
- out = ndrc->translation_cache;
- beginning = start_block();
- emit_movimm(DRC_TEST_VAL + i, 0); // test
- emit_ret();
- literal_pool(0);
- end_block(beginning);
- testfunc = beginning;
- ret[i] = testfunc();
- }
-
- if (ret[0] == DRC_TEST_VAL && ret[1] == DRC_TEST_VAL + 1)
- SysPrintf("test passed.\n");
- else
- SysPrintf("test failed, will likely crash soon (r=%08x %08x)\n", ret[0], ret[1]);
- out = ndrc->translation_cache;
-}
-
-// clear the state completely, instead of just marking
-// things invalid like invalidate_all_pages() does
-void new_dynarec_clear_full(void)
-{
- int n;
- out = ndrc->translation_cache;
- memset(invalid_code,1,sizeof(invalid_code));
- memset(hash_table,0xff,sizeof(hash_table));
- memset(mini_ht,-1,sizeof(mini_ht));
- memset(restore_candidate,0,sizeof(restore_candidate));
- memset(shadow,0,sizeof(shadow));
- copy=shadow;
- expirep=16384; // Expiry pointer, +2 blocks
- pending_exception=0;
- literalcount=0;
- stop_after_jal=0;
- inv_code_start=inv_code_end=~0;
- hack_addr=0;
- f1_hack=0;
- // TLB
- for(n=0;n<4096;n++) ll_clear(jump_in+n);
- for(n=0;n<4096;n++) ll_clear(jump_out+n);
- for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
-
- cycle_multiplier_old = cycle_multiplier;
- new_dynarec_hacks_old = new_dynarec_hacks;
-}
-
-void new_dynarec_init(void)
-{
- SysPrintf("Init new dynarec, ndrc size %x\n", (int)sizeof(*ndrc));
-
-#ifdef _3DS
- check_rosalina();
-#endif
-#ifdef BASE_ADDR_DYNAMIC
- #ifdef VITA
- sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc));
- if (sceBlock <= 0)
- SysPrintf("sceKernelAllocMemBlockForVM failed: %x\n", sceBlock);
- int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc);
- if (ret < 0)
- SysPrintf("sceKernelGetMemBlockBase failed: %x\n", ret);
- sceKernelOpenVMDomain();
- sceClibPrintf("translation_cache = 0x%08lx\n ", (long)ndrc->translation_cache);
- #elif defined(_MSC_VER)
- ndrc = VirtualAlloc(NULL, sizeof(*ndrc), MEM_COMMIT | MEM_RESERVE,
- PAGE_EXECUTE_READWRITE);
- #else
- uintptr_t desired_addr = 0;
- #ifdef __ELF__
- extern char _end;
- desired_addr = ((uintptr_t)&_end + 0xffffff) & ~0xffffffl;
- #endif
- ndrc = mmap((void *)desired_addr, sizeof(*ndrc),
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (ndrc == MAP_FAILED) {
- SysPrintf("mmap() failed: %s\n", strerror(errno));
- abort();
- }
- #endif
-#else
- #ifndef NO_WRITE_EXEC
- // not all systems allow execute in data segment by default
- // size must be 4K aligned for 3DS?
- if (mprotect(ndrc, sizeof(*ndrc),
- PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
- SysPrintf("mprotect() failed: %s\n", strerror(errno));
- #endif
-#endif
- out = ndrc->translation_cache;
- cycle_multiplier=200;
- new_dynarec_clear_full();
-#ifdef HOST_IMM8
- // Copy this into local area so we don't have to put it in every literal pool
- invc_ptr=invalid_code;
-#endif
- arch_init();
- new_dynarec_test();
- ram_offset=(uintptr_t)rdram-0x80000000;
- if (ram_offset!=0)
- SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
-}
-
-void new_dynarec_cleanup(void)
-{
- int n;
-#ifdef BASE_ADDR_DYNAMIC
- #ifdef VITA
- // sceBlock is managed by retroarch's bootstrap code
- //sceKernelFreeMemBlock(sceBlock);
- //sceBlock = -1;
- #else
- if (munmap(ndrc, sizeof(*ndrc)) < 0)
- SysPrintf("munmap() failed\n");
- #endif
-#endif
- for(n=0;n<4096;n++) ll_clear(jump_in+n);
- for(n=0;n<4096;n++) ll_clear(jump_out+n);
- for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
- #ifdef ROM_COPY
- if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");}
- #endif
-}
-
-static u_int *get_source_start(u_int addr, u_int *limit)
-{
- if (addr < 0x00200000 ||
- (0xa0000000 <= addr && addr < 0xa0200000))
- {
- // used for BIOS calls mostly?
- *limit = (addr&0xa0000000)|0x00200000;
- return (u_int *)(rdram + (addr&0x1fffff));
- }
- else if (!Config.HLE && (
- /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/
- (0xbfc00000 <= addr && addr < 0xbfc80000)))
- {
- // BIOS. The multiplier should be much higher as it's uncached 8bit mem,
- // but timings in PCSX are too tied to the interpreter's BIAS
- if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M))
- cycle_multiplier_active = 200;
-
- *limit = (addr & 0xfff00000) | 0x80000;
- return (u_int *)((u_char *)psxR + (addr&0x7ffff));
- }
- else if (addr >= 0x80000000 && addr < 0x80000000+RAM_SIZE) {
- *limit = (addr & 0x80600000) + 0x00200000;
- return (u_int *)(rdram + (addr&0x1fffff));
- }
- return NULL;
-}
-
-static u_int scan_for_ret(u_int addr)
-{
- u_int limit = 0;
- u_int *mem;
-
- mem = get_source_start(addr, &limit);
- if (mem == NULL)
- return addr;
-
- if (limit > addr + 0x1000)
- limit = addr + 0x1000;
- for (; addr < limit; addr += 4, mem++) {
- if (*mem == 0x03e00008) // jr $ra
- return addr + 8;
- }
- return addr;
-}
-
-struct savestate_block {
- uint32_t addr;
- uint32_t regflags;
-};
-
-static int addr_cmp(const void *p1_, const void *p2_)
-{
- const struct savestate_block *p1 = p1_, *p2 = p2_;
- return p1->addr - p2->addr;
-}
-
-int new_dynarec_save_blocks(void *save, int size)
-{
- struct savestate_block *blocks = save;
- int maxcount = size / sizeof(blocks[0]);
- struct savestate_block tmp_blocks[1024];
- struct ll_entry *head;
- int p, s, d, o, bcnt;
- u_int addr;
-
- o = 0;
- for (p = 0; p < ARRAY_SIZE(jump_in); p++) {
- bcnt = 0;
- for (head = jump_in[p]; head != NULL; head = head->next) {
- tmp_blocks[bcnt].addr = head->vaddr;
- tmp_blocks[bcnt].regflags = head->reg_sv_flags;
- bcnt++;
- }
- if (bcnt < 1)
- continue;
- qsort(tmp_blocks, bcnt, sizeof(tmp_blocks[0]), addr_cmp);
-
- addr = tmp_blocks[0].addr;
- for (s = d = 0; s < bcnt; s++) {
- if (tmp_blocks[s].addr < addr)
- continue;
- if (d == 0 || tmp_blocks[d-1].addr != tmp_blocks[s].addr)
- tmp_blocks[d++] = tmp_blocks[s];
- addr = scan_for_ret(tmp_blocks[s].addr);
- }
-
- if (o + d > maxcount)
- d = maxcount - o;
- memcpy(&blocks[o], tmp_blocks, d * sizeof(blocks[0]));
- o += d;
- }
-
- return o * sizeof(blocks[0]);
-}
-
-void new_dynarec_load_blocks(const void *save, int size)
-{
- const struct savestate_block *blocks = save;
- int count = size / sizeof(blocks[0]);
- u_int regs_save[32];
- uint32_t f;
- int i, b;
-
- get_addr(psxRegs.pc);
-
- // change GPRs for speculation to at least partially work..
- memcpy(regs_save, &psxRegs.GPR, sizeof(regs_save));
- for (i = 1; i < 32; i++)
- psxRegs.GPR.r[i] = 0x80000000;
-
- for (b = 0; b < count; b++) {
- for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) {
- if (f & 1)
- psxRegs.GPR.r[i] = 0x1f800000;
- }
-
- get_addr(blocks[b].addr);
-
- for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) {
- if (f & 1)
- psxRegs.GPR.r[i] = 0x80000000;
- }
- }
-
- memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save));
-}
-
-static int apply_hacks(void)
-{
- int i;
- if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS))
- return 0;
- /* special hack(s) */
- for (i = 0; i < slen - 4; i++)
- {
- // lui a4, 0xf200; jal <rcnt_read>; addu a0, 2; slti v0, 28224
- if (source[i] == 0x3c04f200 && dops[i+1].itype == UJUMP
- && source[i+2] == 0x34840002 && dops[i+3].opcode == 0x0a
- && imm[i+3] == 0x6e40 && dops[i+3].rs1 == 2)
- {
- SysPrintf("PE2 hack @%08x\n", start + (i+3)*4);
- dops[i + 3].itype = NOP;
- }
- }
- i = slen;
- if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008
- && source[i-4] == 0x8fbf0018 && source[i-6] == 0x00c0f809
- && dops[i-7].itype == STORE)
- {
- i = i-8;
- if (dops[i].itype == IMM16)
- i--;
- // swl r2, 15(r6); swr r2, 12(r6); sw r6, *; jalr r6
- if (dops[i].itype == STORELR && dops[i].rs1 == 6
- && dops[i-1].itype == STORELR && dops[i-1].rs1 == 6)
- {
- SysPrintf("F1 hack from %08x, old dst %08x\n", start, hack_addr);
- f1_hack = 1;
- return 1;
- }
- }
- return 0;
-}
-
-int new_recompile_block(u_int addr)
-{
- u_int pagelimit = 0;
- u_int state_rflags = 0;
- int i;
-
- assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out);
- //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr);
- //if(debug)
- //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29);
-
- // this is just for speculation
- for (i = 1; i < 32; i++) {
- if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000)
- state_rflags |= 1 << i;
- }
-
- start = (u_int)addr&~3;
- //assert(((u_int)addr&1)==0); // start-in-delay-slot flag
- new_dynarec_did_compile=1;
- if (Config.HLE && start == 0x80001000) // hlecall
- {
- // XXX: is this enough? Maybe check hleSoftCall?
- void *beginning=start_block();
- u_int page=get_page(start);
-
- invalid_code[start>>12]=0;
- emit_movimm(start,0);
- emit_writeword(0,&pcaddr);
- emit_far_jump(new_dyna_leave);
- literal_pool(0);
- end_block(beginning);
- ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning);
- return 0;
- }
- else if (f1_hack && hack_addr == 0) {
- void *beginning = start_block();
- u_int page = get_page(start);
- emit_movimm(start, 0);
- emit_writeword(0, &hack_addr);
- emit_readword(&psxRegs.GPR.n.sp, 0);
- emit_readptr(&mem_rtab, 1);
- emit_shrimm(0, 12, 2);
- emit_readptr_dualindexedx_ptrlen(1, 2, 1);
- emit_addimm(0, 0x18, 0);
- emit_adds_ptr(1, 1, 1);
- emit_ldr_dualindexed(1, 0, 0);
- emit_writeword(0, &psxRegs.GPR.r[26]); // lw k0, 0x18(sp)
- emit_far_call(get_addr_ht);
- emit_jmpreg(0); // jr k0
- literal_pool(0);
- end_block(beginning);
-
- ll_add_flags(jump_in + page, start, state_rflags, beginning);
- SysPrintf("F1 hack to %08x\n", start);
- return 0;