- case FJUMP:
- printf("Jump in the delay slot. This is probably a bug.\n");
- }
- int btaddr=get_reg(regs[0].regmap,BTREG);
- if(btaddr<0) {
- btaddr=get_reg(regs[0].regmap,-1);
- emit_readword((int)&branch_target,btaddr);
- }
- assert(btaddr!=HOST_CCREG);
- if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
-#ifdef HOST_IMM8
- emit_movimm(start+4,HOST_TEMPREG);
- emit_cmp(btaddr,HOST_TEMPREG);
-#else
- emit_cmpimm(btaddr,start+4);
-#endif
- int branch=(int)out;
- emit_jeq(0);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1);
- emit_jmp(jump_vaddr_reg[btaddr]);
- set_jump_target(branch,(int)out);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
- load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
-}
-
-// Basic liveness analysis for MIPS registers
-void unneeded_registers(int istart,int iend,int r)
-{
- int i;
- uint64_t u,uu,gte_u,b,bu,gte_bu;
- uint64_t temp_u,temp_uu,temp_gte_u=0;
- uint64_t tdep;
- uint64_t gte_u_unknown=0;
- if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED)
- gte_u_unknown=~0ll;
- if(iend==slen-1) {
- u=1;uu=1;
- gte_u=gte_u_unknown;
- }else{
- u=unneeded_reg[iend+1];
- uu=unneeded_reg_upper[iend+1];
- u=1;uu=1;
- gte_u=gte_unneeded[iend+1];
- }
-
- for (i=iend;i>=istart;i--)
- {
- //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
- if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
- {
- // If subroutine call, flag return address as a possible branch target
- if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
-
- if(ba[i]<start || ba[i]>=(start+slen*4))
- {
- // Branch out of this block, flush all regs
- u=1;
- uu=1;
- gte_u=gte_u_unknown;
- /* Hexagon hack
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- uu=u=0x300C00F; // Discard at, v0-v1, t6-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- uu=u=0x300C0F3; // Discard at, a0-a3, t6-t9
- }
- if(start>0x80000400&&start<0x80000000+RAM_SIZE) {
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- //uu=u=0x30300FF0FLL; // Discard at, v0-v1, t0-t9, lo, hi
- uu=u=0x300FF0F; // Discard at, v0-v1, t0-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- //uu=u=0x30300FFF3LL; // Discard at, a0-a3, t0-t9, lo, hi
- uu=u=0x300FFF3; // Discard at, a0-a3, t0-t9
- }
- }*/
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- gte_u|=gte_rt[i+1];
- gte_u&=~gte_rs[i+1];
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- gte_u&=gte_unneeded[i+2];
- }
- else
- {
- u=1;
- uu=1;
- gte_u=gte_u_unknown;
- }
- }
- }
- else
- {
- // Internal branch, flag target
- bt[(ba[i]-start)>>2]=1;
- if(ba[i]<=start+i*4) {
- // Backward branch
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- temp_u=1;temp_uu=1;
- temp_gte_u=0;
- } else {
- // Conditional branch (not taken case)
- temp_u=unneeded_reg[i+2];
- temp_uu=unneeded_reg_upper[i+2];
- temp_gte_u&=gte_unneeded[i+2];
- }
- // Merge in delay slot
- tdep=(~temp_uu>>rt1[i+1])&1;
- temp_u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- temp_u|=1;temp_uu|=1;
- temp_gte_u|=gte_rt[i+1];
- temp_gte_u&=~gte_rs[i+1];
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- temp_u&=unneeded_reg[i+2];
- temp_uu&=unneeded_reg_upper[i+2];
- temp_gte_u&=gte_unneeded[i+2];
- }
- else
- {
- temp_u=1;
- temp_uu=1;
- temp_gte_u=gte_u_unknown;
- }
- }
- tdep=(~temp_uu>>rt1[i])&1;
- temp_u|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_uu|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
- temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
- temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
- temp_u|=1;temp_uu|=1;
- temp_gte_u|=gte_rt[i];
- temp_gte_u&=~gte_rs[i];
- unneeded_reg[i]=temp_u;
- unneeded_reg_upper[i]=temp_uu;
- gte_unneeded[i]=temp_gte_u;
- // Only go three levels deep. This recursion can take an
- // excessive amount of time if there are a lot of nested loops.
- if(r<2) {
- unneeded_registers((ba[i]-start)>>2,i-1,r+1);
- }else{
- unneeded_reg[(ba[i]-start)>>2]=1;
- unneeded_reg_upper[(ba[i]-start)>>2]=1;
- gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown;
- }
- } /*else*/ if(1) {
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- u=unneeded_reg[(ba[i]-start)>>2];
- uu=unneeded_reg_upper[(ba[i]-start)>>2];
- gte_u=gte_unneeded[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- //u=1;
- //uu=1;
- //branch_unneeded_reg[i]=u;
- //branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- gte_u|=gte_rt[i+1];
- gte_u&=~gte_rs[i+1];
- } else {
- // Conditional branch
- b=unneeded_reg[(ba[i]-start)>>2];
- bu=unneeded_reg_upper[(ba[i]-start)>>2];
- gte_bu=gte_unneeded[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=b;
- branch_unneeded_reg_upper[i]=bu;
- //b=1;
- //bu=1;
- //branch_unneeded_reg[i]=b;
- //branch_unneeded_reg_upper[i]=bu;
- // Branch delay slot
- tdep=(~uu>>rt1[i+1])&1;
- b|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- bu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- b&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- b|=1;bu|=1;
- gte_bu|=gte_rt[i+1];
- gte_bu&=~gte_rs[i+1];
- // If branch is "likely" then we skip the
- // delay slot on the fall-thru path
- if(likely[i]) {
- u=b;
- uu=bu;
- gte_u=gte_bu;
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- gte_u&=gte_unneeded[i+2];
- //u=1;
- //uu=1;
- }
- } else {
- u&=b;
- uu&=bu;
- gte_u&=gte_bu;
- //u=1;
- //uu=1;
- }
- if(i<slen-1) {
- branch_unneeded_reg[i]&=unneeded_reg[i+2];
- branch_unneeded_reg_upper[i]&=unneeded_reg_upper[i+2];
- //branch_unneeded_reg[i]=1;
- //branch_unneeded_reg_upper[i]=1;
- } else {
- branch_unneeded_reg[i]=1;
- branch_unneeded_reg_upper[i]=1;
- }
- }
- }
- }
- }
- else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
- {
- // SYSCALL instruction (software interrupt)
- u=1;
- uu=1;
- }
- else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
- u=1;
- uu=1;
- }
- //u=uu=1; // DEBUG
- tdep=(~uu>>rt1[i])&1;
- // Written registers are unneeded
- u|=1LL<<rt1[i];
- u|=1LL<<rt2[i];
- uu|=1LL<<rt1[i];
- uu|=1LL<<rt2[i];
- gte_u|=gte_rt[i];
- // Accessed registers are needed
- u&=~(1LL<<rs1[i]);
- u&=~(1LL<<rs2[i]);
- uu&=~(1LL<<us1[i]);
- uu&=~(1LL<<us2[i]);
- gte_u&=~gte_rs[i];
- if(gte_rs[i]&&rt1[i]&&(unneeded_reg[i+1]&(1ll<<rt1[i])))
- gte_u|=gte_rs[i]>e_unneeded[i+1]; // MFC2/CFC2 to dead register, unneeded
- // Source-target dependencies
- uu&=~(tdep<<dep1[i]);
- uu&=~(tdep<<dep2[i]);
- // R0 is always unneeded
- u|=1;uu|=1;
- // Save it
- unneeded_reg[i]=u;
- unneeded_reg_upper[i]=uu;
- gte_unneeded[i]=gte_u;
- /*
- printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
- printf("U:");
- int r;
- for(r=1;r<=CCREG;r++) {
- if((unneeded_reg[i]>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf(" UU:");
- for(r=1;r<=CCREG;r++) {
- if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf("\n");*/
- }
-#ifdef FORCE32
- for (i=iend;i>=istart;i--)
- {
- unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL;