- // branch not taken
- if(1) { // <- FIXME (don't need this)
- set_jump_target(nottaken,(int)out);
- assem_debug("1:\n");
- if(!likely[i]) {
- wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
- ds_unneeded,ds_unneeded_upper);
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
- address_generation(i+1,&branch_regs[i],0);
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
- ds_assemble(i+1,&branch_regs[i]);
- }
- cc=get_reg(branch_regs[i].regmap,CCREG);
- if(cc==-1&&!likely[i]) {
- // Cycle count isn't in a register, temporarily load it then write it out
- emit_loadreg(CCREG,HOST_CCREG);
- emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG);
- int jaddr=(int)out;
- emit_jns(0);
- add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
- emit_storereg(CCREG,HOST_CCREG);
- }
- else{
- cc=get_reg(i_regmap,CCREG);
- assert(cc==HOST_CCREG);
- emit_addimm_and_set_flags(CLOCK_DIVIDER*(ccadj[i]+2),cc);
- int jaddr=(int)out;
- emit_jns(0);
- add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
- }
- }
- }
-}
-
-static void pagespan_assemble(int i,struct regstat *i_regs)
-{
- int s1l=get_reg(i_regs->regmap,rs1[i]);
- int s1h=get_reg(i_regs->regmap,rs1[i]|64);
- int s2l=get_reg(i_regs->regmap,rs2[i]);
- int s2h=get_reg(i_regs->regmap,rs2[i]|64);
- void *nt_branch=NULL;
- int taken=0;
- int nottaken=0;
- int unconditional=0;
- if(rs1[i]==0)
- {
- s1l=s2l;s1h=s2h;
- s2l=s2h=-1;
- }
- else if(rs2[i]==0)
- {
- s2l=s2h=-1;
- }
- if((i_regs->is32>>rs1[i])&(i_regs->is32>>rs2[i])&1) {
- s1h=s2h=-1;
- }
- int hr=0;
- int addr,alt,ntaddr;
- if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;}
- else {
- while(hr<HOST_REGS)
- {
- if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
- (i_regs->regmap[hr]&63)!=rs1[i] &&
- (i_regs->regmap[hr]&63)!=rs2[i] )
- {
- addr=hr++;break;
- }
- hr++;
- }
- }
- while(hr<HOST_REGS)
- {
- if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
- (i_regs->regmap[hr]&63)!=rs1[i] &&
- (i_regs->regmap[hr]&63)!=rs2[i] )
- {
- alt=hr++;break;
- }
- hr++;
- }
- if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
- {
- while(hr<HOST_REGS)
- {
- if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
- (i_regs->regmap[hr]&63)!=rs1[i] &&
- (i_regs->regmap[hr]&63)!=rs2[i] )
- {
- ntaddr=hr;break;
- }
- hr++;
- }
- }
- assert(hr<HOST_REGS);
- if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1
- load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
- }
- emit_addimm(HOST_CCREG,CLOCK_DIVIDER*(ccadj[i]+2),HOST_CCREG);
- if(opcode[i]==2) // J
- {
- unconditional=1;
- }
- if(opcode[i]==3) // JAL
- {
- // TODO: mini_ht
- int rt=get_reg(i_regs->regmap,31);
- emit_movimm(start+i*4+8,rt);
- unconditional=1;
- }
- if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
- {
- emit_mov(s1l,addr);
- if(opcode2[i]==9) // JALR
- {
- int rt=get_reg(i_regs->regmap,rt1[i]);
- emit_movimm(start+i*4+8,rt);
- }
- }
- if((opcode[i]&0x3f)==4) // BEQ
- {
- if(rs1[i]==rs2[i])
- {
- unconditional=1;
- }
- else
- #ifdef HAVE_CMOV_IMM
- if(s1h<0) {
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
- }
- else
- #endif
- {
- assert(s1l>=0);
- emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- emit_cmovne_reg(alt,addr);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmovne_reg(alt,addr);
- }
- }
- if((opcode[i]&0x3f)==5) // BNE
- {
- #ifdef HAVE_CMOV_IMM
- if(s1h<0) {
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
- }
- else
- #endif
- {
- assert(s1l>=0);
- emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- emit_cmovne_reg(alt,addr);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmovne_reg(alt,addr);
- }
- }
- if((opcode[i]&0x3f)==0x14) // BEQL
- {
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- nottaken=(int)out;
- emit_jne(0);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- if(nottaken) set_jump_target(nottaken,(int)out);
- nottaken=(int)out;
- emit_jne(0);
- }
- if((opcode[i]&0x3f)==0x15) // BNEL
- {
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- taken=(int)out;
- emit_jne(0);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- nottaken=(int)out;
- emit_jeq(0);
- if(taken) set_jump_target(taken,(int)out);
- }
- if((opcode[i]&0x3f)==6) // BLEZ
- {
- emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
- emit_cmpimm(s1l,1);
- if(s1h>=0) emit_mov(addr,ntaddr);
- emit_cmovl_reg(alt,addr);
- if(s1h>=0) {
- emit_test(s1h,s1h);
- emit_cmovne_reg(ntaddr,addr);
- emit_cmovs_reg(alt,addr);
- }
- }
- if((opcode[i]&0x3f)==7) // BGTZ
- {
- emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
- emit_cmpimm(s1l,1);
- if(s1h>=0) emit_mov(addr,alt);
- emit_cmovl_reg(ntaddr,addr);
- if(s1h>=0) {
- emit_test(s1h,s1h);
- emit_cmovne_reg(alt,addr);
- emit_cmovs_reg(ntaddr,addr);
- }
- }
- if((opcode[i]&0x3f)==0x16) // BLEZL
- {
- assert((opcode[i]&0x3f)!=0x16);
- }
- if((opcode[i]&0x3f)==0x17) // BGTZL
- {
- assert((opcode[i]&0x3f)!=0x17);
- }
- assert(opcode[i]!=1); // BLTZ/BGEZ
-
- //FIXME: Check CSREG
- if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
- if((source[i]&0x30000)==0) // BC1F
- {
- emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
- emit_testimm(s1l,0x800000);
- emit_cmovne_reg(alt,addr);
- }
- if((source[i]&0x30000)==0x10000) // BC1T
- {
- emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
- emit_testimm(s1l,0x800000);
- emit_cmovne_reg(alt,addr);
- }
- if((source[i]&0x30000)==0x20000) // BC1FL
- {
- emit_testimm(s1l,0x800000);
- nottaken=(int)out;
- emit_jne(0);
- }
- if((source[i]&0x30000)==0x30000) // BC1TL
- {
- emit_testimm(s1l,0x800000);
- nottaken=(int)out;
- emit_jeq(0);
- }
- }
-
- assert(i_regs->regmap[HOST_CCREG]==CCREG);
- wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
- if(likely[i]||unconditional)
- {
- emit_movimm(ba[i],HOST_BTREG);
- }
- else if(addr!=HOST_BTREG)
- {
- emit_mov(addr,HOST_BTREG);
- }
- void *branch_addr=out;
- emit_jmp(0);
- int target_addr=start+i*4+5;
- void *stub=out;
- void *compiled_target_addr=check_addr(target_addr);
- emit_extjump_ds((int)branch_addr,target_addr);
- if(compiled_target_addr) {
- set_jump_target((int)branch_addr,(int)compiled_target_addr);
- add_link(target_addr,stub);
- }
- else set_jump_target((int)branch_addr,(int)stub);
- if(likely[i]) {
- // Not-taken path
- set_jump_target((int)nottaken,(int)out);
- wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
- void *branch_addr=out;
- emit_jmp(0);
- int target_addr=start+i*4+8;
- void *stub=out;
- void *compiled_target_addr=check_addr(target_addr);
- emit_extjump_ds((int)branch_addr,target_addr);
- if(compiled_target_addr) {
- set_jump_target((int)branch_addr,(int)compiled_target_addr);
- add_link(target_addr,stub);
- }
- else set_jump_target((int)branch_addr,(int)stub);
- }
-}
-
-// Assemble the delay slot for the above
-static void pagespan_ds()
-{
- assem_debug("initial delay slot:\n");
- u_int vaddr=start+1;
- u_int page=get_page(vaddr);
- u_int vpage=get_vpage(vaddr);
- ll_add(jump_dirty+vpage,vaddr,(void *)out);
- do_dirty_stub_ds();
- ll_add(jump_in+page,vaddr,(void *)out);
- assert(regs[0].regmap_entry[HOST_CCREG]==CCREG);
- if(regs[0].regmap[HOST_CCREG]!=CCREG)
- wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32);
- if(regs[0].regmap[HOST_BTREG]!=BTREG)
- emit_writeword(HOST_BTREG,(int)&branch_target);
- load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]);
- address_generation(0,®s[0],regs[0].regmap_entry);
- if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a)
- load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP);
- cop1_usable=0;
- is_delayslot=0;
- switch(itype[0]) {
- case ALU:
- alu_assemble(0,®s[0]);break;
- case IMM16:
- imm16_assemble(0,®s[0]);break;
- case SHIFT:
- shift_assemble(0,®s[0]);break;
- case SHIFTIMM:
- shiftimm_assemble(0,®s[0]);break;
- case LOAD:
- load_assemble(0,®s[0]);break;
- case LOADLR:
- loadlr_assemble(0,®s[0]);break;
- case STORE:
- store_assemble(0,®s[0]);break;
- case STORELR:
- storelr_assemble(0,®s[0]);break;
- case COP0:
- cop0_assemble(0,®s[0]);break;
- case COP1:
- cop1_assemble(0,®s[0]);break;
- case C1LS:
- c1ls_assemble(0,®s[0]);break;
- case COP2:
- cop2_assemble(0,®s[0]);break;
- case C2LS:
- c2ls_assemble(0,®s[0]);break;
- case C2OP:
- c2op_assemble(0,®s[0]);break;
- case FCONV:
- fconv_assemble(0,®s[0]);break;
- case FLOAT:
- float_assemble(0,®s[0]);break;
- case FCOMP:
- fcomp_assemble(0,®s[0]);break;
- case MULTDIV:
- multdiv_assemble(0,®s[0]);break;
- case MOV:
- mov_assemble(0,®s[0]);break;
- case SYSCALL:
- case HLECALL:
- case INTCALL:
- case SPAN:
- case UJUMP:
- case RJUMP:
- case CJUMP:
- case SJUMP:
- case FJUMP:
- printf("Jump in the delay slot. This is probably a bug.\n");
- }
- int btaddr=get_reg(regs[0].regmap,BTREG);
- if(btaddr<0) {
- btaddr=get_reg(regs[0].regmap,-1);
- emit_readword((int)&branch_target,btaddr);
- }
- assert(btaddr!=HOST_CCREG);
- if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
-#ifdef HOST_IMM8
- emit_movimm(start+4,HOST_TEMPREG);
- emit_cmp(btaddr,HOST_TEMPREG);
-#else
- emit_cmpimm(btaddr,start+4);
-#endif
- int branch=(int)out;
- emit_jeq(0);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1);
- emit_jmp(jump_vaddr_reg[btaddr]);
- set_jump_target(branch,(int)out);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
- load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
-}
-
-// Basic liveness analysis for MIPS registers
-void unneeded_registers(int istart,int iend,int r)
-{
- int i;
- uint64_t u,uu,b,bu;
- uint64_t temp_u,temp_uu;
- uint64_t tdep;
- if(iend==slen-1) {
- u=1;uu=1;
- }else{
- u=unneeded_reg[iend+1];
- uu=unneeded_reg_upper[iend+1];
- u=1;uu=1;
- }
- for (i=iend;i>=istart;i--)
- {
- //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
- if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
- {
- // If subroutine call, flag return address as a possible branch target
- if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
-
- if(ba[i]<start || ba[i]>=(start+slen*4))
- {
- // Branch out of this block, flush all regs
- u=1;
- uu=1;
- /* Hexagon hack
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- uu=u=0x300C00F; // Discard at, v0-v1, t6-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- uu=u=0x300C0F3; // Discard at, a0-a3, t6-t9
- }
- if(start>0x80000400&&start<0x80000000+RAM_SIZE) {
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- //uu=u=0x30300FF0FLL; // Discard at, v0-v1, t0-t9, lo, hi
- uu=u=0x300FF0F; // Discard at, v0-v1, t0-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- //uu=u=0x30300FFF3LL; // Discard at, a0-a3, t0-t9, lo, hi
- uu=u=0x300FFF3; // Discard at, a0-a3, t0-t9
- }
- }*/
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- }
- else
- {
- u=1;
- uu=1;
- }
- }
- }
- else
- {
- // Internal branch, flag target
- bt[(ba[i]-start)>>2]=1;
- if(ba[i]<=start+i*4) {
- // Backward branch
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- temp_u=1;temp_uu=1;
- } else {
- // Conditional branch (not taken case)
- temp_u=unneeded_reg[i+2];
- temp_uu=unneeded_reg_upper[i+2];
- }
- // Merge in delay slot
- tdep=(~temp_uu>>rt1[i+1])&1;
- temp_u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- temp_u|=1;temp_uu|=1;
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- temp_u&=unneeded_reg[i+2];
- temp_uu&=unneeded_reg_upper[i+2];
- }
- else
- {
- temp_u=1;
- temp_uu=1;
- }
- }
- tdep=(~temp_uu>>rt1[i])&1;
- temp_u|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_uu|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
- temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
- temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
- temp_u|=1;temp_uu|=1;
- unneeded_reg[i]=temp_u;
- unneeded_reg_upper[i]=temp_uu;
- // Only go three levels deep. This recursion can take an
- // excessive amount of time if there are a lot of nested loops.
- if(r<2) {
- unneeded_registers((ba[i]-start)>>2,i-1,r+1);
- }else{
- unneeded_reg[(ba[i]-start)>>2]=1;
- unneeded_reg_upper[(ba[i]-start)>>2]=1;
- }
- } /*else*/ if(1) {
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- u=unneeded_reg[(ba[i]-start)>>2];
- uu=unneeded_reg_upper[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- //u=1;
- //uu=1;
- //branch_unneeded_reg[i]=u;
- //branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- } else {
- // Conditional branch
- b=unneeded_reg[(ba[i]-start)>>2];
- bu=unneeded_reg_upper[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=b;
- branch_unneeded_reg_upper[i]=bu;
- //b=1;
- //bu=1;
- //branch_unneeded_reg[i]=b;
- //branch_unneeded_reg_upper[i]=bu;
- // Branch delay slot
- tdep=(~uu>>rt1[i+1])&1;
- b|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- bu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- b&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- b|=1;bu|=1;
- // If branch is "likely" then we skip the
- // delay slot on the fall-thru path
- if(likely[i]) {
- u=b;
- uu=bu;
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- //u=1;
- //uu=1;
- }
- } else {
- u&=b;
- uu&=bu;
- //u=1;
- //uu=1;
- }
- if(i<slen-1) {
- branch_unneeded_reg[i]&=unneeded_reg[i+2];
- branch_unneeded_reg_upper[i]&=unneeded_reg_upper[i+2];
- //branch_unneeded_reg[i]=1;
- //branch_unneeded_reg_upper[i]=1;
- } else {
- branch_unneeded_reg[i]=1;
- branch_unneeded_reg_upper[i]=1;
- }
- }
- }
- }
- }
- else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
- {
- // SYSCALL instruction (software interrupt)
- u=1;
- uu=1;
- }
- else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
- u=1;
- uu=1;
- }
- //u=uu=1; // DEBUG
- tdep=(~uu>>rt1[i])&1;
- // Written registers are unneeded
- u|=1LL<<rt1[i];
- u|=1LL<<rt2[i];
- uu|=1LL<<rt1[i];
- uu|=1LL<<rt2[i];
- // Accessed registers are needed
- u&=~(1LL<<rs1[i]);
- u&=~(1LL<<rs2[i]);
- uu&=~(1LL<<us1[i]);
- uu&=~(1LL<<us2[i]);
- // Source-target dependencies
- uu&=~(tdep<<dep1[i]);
- uu&=~(tdep<<dep2[i]);
- // R0 is always unneeded
- u|=1;uu|=1;
- // Save it
- unneeded_reg[i]=u;
- unneeded_reg_upper[i]=uu;
- /*
- printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
- printf("U:");
- int r;
- for(r=1;r<=CCREG;r++) {
- if((unneeded_reg[i]>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf(" UU:");
- for(r=1;r<=CCREG;r++) {
- if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf("\n");*/
- }
-#ifdef FORCE32
- for (i=iend;i>=istart;i--)
- {
- unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL;
- }
-#endif
-}
-
-// Identify registers which are likely to contain 32-bit values
-// This is used to predict whether any branches will jump to a
-// location with 64-bit values in registers.
-static void provisional_32bit()
-{
- int i,j;
- uint64_t is32=1;
- uint64_t lastbranch=1;
-
- for(i=0;i<slen;i++)
- {
- if(i>0) {
- if(itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP) {
- if(i>1) is32=lastbranch;
- else is32=1;
- }
- }
- if(i>1)
- {
- if(itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP) {
- if(likely[i-2]) {
- if(i>2) is32=lastbranch;
- else is32=1;
- }
- }
- if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL
- {
- if(rs1[i-2]==0||rs2[i-2]==0)
- {
- if(rs1[i-2]) {
- is32|=1LL<<rs1[i-2];
- }
- if(rs2[i-2]) {
- is32|=1LL<<rs2[i-2];
- }
- }
- }
- }
- // If something jumps here with 64-bit values
- // then promote those registers to 64 bits
- if(bt[i])
- {
- uint64_t temp_is32=is32;
- for(j=i-1;j>=0;j--)
- {
- if(ba[j]==start+i*4)
- //temp_is32&=branch_regs[j].is32;
- temp_is32&=p32[j];
- }
- for(j=i;j<slen;j++)
- {
- if(ba[j]==start+i*4)
- temp_is32=1;
- }
- is32=temp_is32;
- }
- int type=itype[i];
- int op=opcode[i];
- int op2=opcode2[i];
- int rt=rt1[i];
- int s1=rs1[i];
- int s2=rs2[i];
- if(type==UJUMP||type==RJUMP||type==CJUMP||type==SJUMP||type==FJUMP) {
- // Branches don't write registers, consider the delay slot instead.
- type=itype[i+1];
- op=opcode[i+1];
- op2=opcode2[i+1];
- rt=rt1[i+1];
- s1=rs1[i+1];
- s2=rs2[i+1];
- lastbranch=is32;
- }
- switch(type) {
- case LOAD:
- if(opcode[i]==0x27||opcode[i]==0x37|| // LWU/LD
- opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
- is32&=~(1LL<<rt);
- else
- is32|=1LL<<rt;
- break;
- case STORE:
- case STORELR:
- break;
- case LOADLR:
- if(op==0x1a||op==0x1b) is32&=~(1LL<<rt); // LDR/LDL
- if(op==0x22) is32|=1LL<<rt; // LWL
- break;
- case IMM16:
- if (op==0x08||op==0x09|| // ADDI/ADDIU
- op==0x0a||op==0x0b|| // SLTI/SLTIU
- op==0x0c|| // ANDI
- op==0x0f) // LUI
- {
- is32|=1LL<<rt;
- }
- if(op==0x18||op==0x19) { // DADDI/DADDIU
- is32&=~(1LL<<rt);
- //if(imm[i]==0)
- // is32|=((is32>>s1)&1LL)<<rt;
- }
- if(op==0x0d||op==0x0e) { // ORI/XORI
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- break;
- case UJUMP:
- break;
- case RJUMP:
- break;
- case CJUMP:
- break;
- case SJUMP:
- break;
- case FJUMP:
- break;
- case ALU:
- if(op2>=0x20&&op2<=0x23) { // ADD/ADDU/SUB/SUBU
- is32|=1LL<<rt;
- }
- if(op2==0x2a||op2==0x2b) { // SLT/SLTU
- is32|=1LL<<rt;
- }
- else if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR
- uint64_t sr=((is32>>s1)&(is32>>s2)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else if(op2>=0x2c&&op2<=0x2d) { // DADD/DADDU
- if(s1==0&&s2==0) {
- is32|=1LL<<rt;
- }
- else if(s2==0) {
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else if(s1==0) {
- uint64_t sr=((is32>>s2)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else {
- is32&=~(1LL<<rt);
- }
- }
- else if(op2>=0x2e&&op2<=0x2f) { // DSUB/DSUBU
- if(s1==0&&s2==0) {
- is32|=1LL<<rt;
- }
- else if(s2==0) {
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else {
- is32&=~(1LL<<rt);
- }
- }
- break;
- case MULTDIV:
- if (op2>=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU
- is32&=~((1LL<<HIREG)|(1LL<<LOREG));
- }
- else {
- is32|=(1LL<<HIREG)|(1LL<<LOREG);
- }
- break;
- case MOV:
- {
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- break;
- case SHIFT:
- if(op2>=0x14&&op2<=0x17) is32&=~(1LL<<rt); // DSLLV/DSRLV/DSRAV
- else is32|=1LL<<rt; // SLLV/SRLV/SRAV
- break;
- case SHIFTIMM:
- is32|=1LL<<rt;
- // DSLL/DSRL/DSRA/DSLL32/DSRL32 but not DSRA32 have 64-bit result
- if(op2>=0x38&&op2<0x3f) is32&=~(1LL<<rt);
- break;
- case COP0:
- if(op2==0) is32|=1LL<<rt; // MFC0
- break;
- case COP1:
- case COP2:
- if(op2==0) is32|=1LL<<rt; // MFC1
- if(op2==1) is32&=~(1LL<<rt); // DMFC1
- if(op2==2) is32|=1LL<<rt; // CFC1
- break;
- case C1LS:
- case C2LS:
- break;
- case FLOAT:
- case FCONV:
- break;
- case FCOMP:
- break;
- case C2OP:
- case SYSCALL:
- case HLECALL:
- break;
- default:
- break;
- }
- is32|=1;
- p32[i]=is32;
-
- if(i>0)
- {
- if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)
- {
- if(rt1[i-1]==31) // JAL/JALR
- {
- // Subroutine call will return here, don't alloc any registers
- is32=1;
- }
- else if(i+1<slen)
- {
- // Internal branch will jump here, match registers to caller
- is32=0x3FFFFFFFFLL;
- }
- }
- }
- }
-}
-
-// Identify registers which may be assumed to contain 32-bit values
-// and where optimizations will rely on this.
-// This is used to determine whether backward branches can safely
-// jump to a location with 64-bit values in registers.
-static void provisional_r32()
-{
- u_int r32=0;
- int i;
-
- for (i=slen-1;i>=0;i--)
- {
- int hr;
- if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
- {
- if(ba[i]<start || ba[i]>=(start+slen*4))
- {
- // Branch out of this block, don't need anything
- r32=0;
- }
- else
- {
- // Internal branch
- // Need whatever matches the target
- // (and doesn't get overwritten by the delay slot instruction)
- r32=0;
- int t=(ba[i]-start)>>2;
- if(ba[i]>start+i*4) {
- // Forward branch
- //if(!(requires_32bit[t]&~regs[i].was32))
- // r32|=requires_32bit[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
- if(!(pr32[t]&~regs[i].was32))
- r32|=pr32[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
- }else{
- // Backward branch
- if(!(regs[t].was32&~unneeded_reg_upper[t]&~regs[i].was32))
- r32|=regs[t].was32&~unneeded_reg_upper[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
- }
- }
- // Conditional branch may need registers for following instructions
- if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
- {
- if(i<slen-2) {
- //r32|=requires_32bit[i+2];
- r32|=pr32[i+2];
- r32&=regs[i].was32;
- // Mark this address as a branch target since it may be called
- // upon return from interrupt
- //bt[i+2]=1;
- }
- }
- // Merge in delay slot
- if(!likely[i]) {
- // These are overwritten unless the branch is "likely"
- // and the delay slot is nullified if not taken
- r32&=~(1LL<<rt1[i+1]);
- r32&=~(1LL<<rt2[i+1]);
- }
- // Assume these are needed (delay slot)
- if(us1[i+1]>0)
- {
- if((regs[i].was32>>us1[i+1])&1) r32|=1LL<<us1[i+1];
- }
- if(us2[i+1]>0)
- {
- if((regs[i].was32>>us2[i+1])&1) r32|=1LL<<us2[i+1];
- }
- if(dep1[i+1]&&!((unneeded_reg_upper[i]>>dep1[i+1])&1))
- {
- if((regs[i].was32>>dep1[i+1])&1) r32|=1LL<<dep1[i+1];
- }
- if(dep2[i+1]&&!((unneeded_reg_upper[i]>>dep2[i+1])&1))
- {
- if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<<dep2[i+1];
- }
- }
- else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
- {
- // SYSCALL instruction (software interrupt)
- r32=0;
- }
- else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
- r32=0;
- }
- // Check 32 bits
- r32&=~(1LL<<rt1[i]);
- r32&=~(1LL<<rt2[i]);
- if(us1[i]>0)
- {
- if((regs[i].was32>>us1[i])&1) r32|=1LL<<us1[i];
- }
- if(us2[i]>0)
- {
- if((regs[i].was32>>us2[i])&1) r32|=1LL<<us2[i];
- }
- if(dep1[i]&&!((unneeded_reg_upper[i]>>dep1[i])&1))
- {
- if((regs[i].was32>>dep1[i])&1) r32|=1LL<<dep1[i];
- }
- if(dep2[i]&&!((unneeded_reg_upper[i]>>dep2[i])&1))
- {
- if((regs[i].was32>>dep2[i])&1) r32|=1LL<<dep2[i];
- }
- //requires_32bit[i]=r32;
- pr32[i]=r32;
-
- // Dirty registers which are 32-bit, require 32-bit input
- // as they will be written as 32-bit values
- for(hr=0;hr<HOST_REGS;hr++)
- {
- if(regs[i].regmap_entry[hr]>0&®s[i].regmap_entry[hr]<64) {
- if((regs[i].was32>>regs[i].regmap_entry[hr])&(regs[i].wasdirty>>hr)&1) {
- if(!((unneeded_reg_upper[i]>>regs[i].regmap_entry[hr])&1))
- pr32[i]|=1LL<<regs[i].regmap_entry[hr];
- //requires_32bit[i]|=1LL<<regs[i].regmap_entry[hr];
- }
- }
- }
- }
-}
-
-// Write back dirty registers as soon as we will no longer modify them,
-// so that we don't end up with lots of writes at the branches.
-void clean_registers(int istart,int iend,int wr)
-{
- int i;
- int r;
- u_int will_dirty_i,will_dirty_next,temp_will_dirty;
- u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty;
- if(iend==slen-1) {
- will_dirty_i=will_dirty_next=0;
- wont_dirty_i=wont_dirty_next=0;
- }else{
- will_dirty_i=will_dirty_next=will_dirty[iend+1];
- wont_dirty_i=wont_dirty_next=wont_dirty[iend+1];
- }
- for (i=iend;i>=istart;i--)
- {
- if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
- {
- if(ba[i]<start || ba[i]>=(start+slen*4))
- {
- // Branch out of this block, flush all regs
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- will_dirty_i=0;
- wont_dirty_i=0;
- // Merge in delay slot (will dirty)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- }
- }
- }
- else
- {
- // Conditional branch
- will_dirty_i=0;
- wont_dirty_i=wont_dirty_next;
- // Merge in delay slot (will dirty)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(!likely[i]) {
- // Might not dirty if likely branch is not taken
- if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]==0) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- }
- }
- }
- }
- // Merge in delay slot (wont dirty)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
- if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
- if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
- }
- }
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- branch_regs[i].dirty&=wont_dirty_i;
- #endif
- branch_regs[i].dirty|=will_dirty_i;
- }
- }
- else
- {
- // Internal branch
- if(ba[i]<=start+i*4) {
- // Backward branch
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- temp_will_dirty=0;
- temp_wont_dirty=0;
- // Merge in delay slot (will dirty)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
- if(branch_regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
- if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
- if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
- if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
- }
- }
- } else {
- // Conditional branch (not taken case)
- temp_will_dirty=will_dirty_next;
- temp_wont_dirty=wont_dirty_next;
- // Merge in delay slot (will dirty)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(!likely[i]) {
- // Will not dirty if likely branch is not taken
- if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
- if(branch_regs[i].regmap[r]==0) temp_will_dirty&=~(1<<r);
- if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
- //if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
- //if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
- if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
- if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
- if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
- }
- }
- }
- }
- // Merge in delay slot (wont dirty)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if((regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
- if(regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
- if(branch_regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
- }
- }
- // Deal with changed mappings
- if(i<iend) {
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(regs[i].regmap[r]!=regmap_pre[i][r]) {
- temp_will_dirty&=~(1<<r);
- temp_wont_dirty&=~(1<<r);
- if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
- temp_will_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
- temp_wont_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
- } else {
- temp_will_dirty|=1<<r;
- temp_wont_dirty|=1<<r;
- }
- }
- }
- }
- }
- if(wr) {
- will_dirty[i]=temp_will_dirty;
- wont_dirty[i]=temp_wont_dirty;
- clean_registers((ba[i]-start)>>2,i-1,0);
- }else{
- // Limit recursion. It can take an excessive amount
- // of time if there are a lot of nested loops.
- will_dirty[(ba[i]-start)>>2]=0;
- wont_dirty[(ba[i]-start)>>2]=-1;
- }
- }
- /*else*/ if(1)
- {
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- will_dirty_i=0;
- wont_dirty_i=0;
- //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
- will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<<r);
- wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
- }
- }
- }
- //}
- // Merge in delay slot
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- }
- }
- } else {
- // Conditional branch
- will_dirty_i=will_dirty_next;
- wont_dirty_i=wont_dirty_next;
- //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
- will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<<r);
- wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
- }
- else
- {
- will_dirty_i&=~(1<<r);
- }
- // Treat delay slot as part of branch too
- /*if(regs[i+1].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
- will_dirty[i+1]&=will_dirty[(ba[i]-start)>>2]&(1<<r);
- wont_dirty[i+1]|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
- }
- else
- {
- will_dirty[i+1]&=~(1<<r);
- }*/
- }
- }
- //}
- // Merge in delay slot
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(!likely[i]) {
- // Might not dirty if likely branch is not taken
- if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- }
- }
- }
- }
- // Merge in delay slot
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
- if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
- if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
- if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
- }
- }
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- branch_regs[i].dirty&=wont_dirty_i;
- #endif
- branch_regs[i].dirty|=will_dirty_i;
- }
- }
- }
- }
- else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
- {
- // SYSCALL instruction (software interrupt)
- will_dirty_i=0;
- wont_dirty_i=0;
- }
- else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
- will_dirty_i=0;
- wont_dirty_i=0;
- }
- will_dirty_next=will_dirty_i;
- wont_dirty_next=wont_dirty_i;
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
- if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
- if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
- if(i>istart) {
- if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP)
- {
- // Don't store a register immediately after writing it,
- // may prevent dual-issue.
- if((regs[i].regmap[r]&63)==rt1[i-1]) wont_dirty_i|=1<<r;
- if((regs[i].regmap[r]&63)==rt2[i-1]) wont_dirty_i|=1<<r;
- }
- }
- }
- }
- // Save it
- will_dirty[i]=will_dirty_i;
- wont_dirty[i]=wont_dirty_i;
- // Mark registers that won't be dirtied as not dirty
- if(wr) {
- /*printf("wr (%d,%d) %x will:",istart,iend,start+i*4);
- for(r=0;r<HOST_REGS;r++) {
- if((will_dirty_i>>r)&1) {
- printf(" r%d",r);
- }
- }
- printf("\n");*/
-
- //if(i==istart||(itype[i-1]!=RJUMP&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=FJUMP)) {
- regs[i].dirty|=will_dirty_i;
- #ifndef DESTRUCTIVE_WRITEBACK
- regs[i].dirty&=wont_dirty_i;
- if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
- {
- if(i<iend-1&&itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) {
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(regs[i].regmap[r]==regmap_pre[i+2][r]) {
- regs[i+2].wasdirty&=wont_dirty_i|~(1<<r);
- }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);/*assert(!((wont_dirty_i>>r)&1));*/}
- }
- }
- }
- }
- else
- {
- if(i<iend) {
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- if(regs[i].regmap[r]==regmap_pre[i+1][r]) {
- regs[i+1].wasdirty&=wont_dirty_i|~(1<<r);
- }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);/*assert(!((wont_dirty_i>>r)&1));*/}
- }
- }
- }
- }
- #endif
- //}
- }
- // Deal with changed mappings
- temp_will_dirty=will_dirty_i;
- temp_wont_dirty=wont_dirty_i;
- for(r=0;r<HOST_REGS;r++) {
- if(r!=EXCLUDE_REG) {
- int nr;
- if(regs[i].regmap[r]==regmap_pre[i][r]) {
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- regs[i].wasdirty&=wont_dirty_i|~(1<<r);
- #endif
- regs[i].wasdirty|=will_dirty_i&(1<<r);
- }
- }
- else if((nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
- // Register moved to a different register
- will_dirty_i&=~(1<<r);
- wont_dirty_i&=~(1<<r);
- will_dirty_i|=((temp_will_dirty>>nr)&1)<<r;
- wont_dirty_i|=((temp_wont_dirty>>nr)&1)<<r;
- if(wr) {
- #ifndef DESTRUCTIVE_WRITEBACK
- regs[i].wasdirty&=wont_dirty_i|~(1<<r);
- #endif
- regs[i].wasdirty|=will_dirty_i&(1<<r);
- }
- }
- else {
- will_dirty_i&=~(1<<r);
- wont_dirty_i&=~(1<<r);
- if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
- will_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
- wont_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
- } else {
- wont_dirty_i|=1<<r;
- /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);/*assert(!((will_dirty>>r)&1));*/
- }
- }
- }
- }
- }
-}