- if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a)
- load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP);
- cop1_usable=0;
- is_delayslot=0;
- switch(itype[0]) {
- case ALU:
- alu_assemble(0,®s[0]);break;
- case IMM16:
- imm16_assemble(0,®s[0]);break;
- case SHIFT:
- shift_assemble(0,®s[0]);break;
- case SHIFTIMM:
- shiftimm_assemble(0,®s[0]);break;
- case LOAD:
- load_assemble(0,®s[0]);break;
- case LOADLR:
- loadlr_assemble(0,®s[0]);break;
- case STORE:
- store_assemble(0,®s[0]);break;
- case STORELR:
- storelr_assemble(0,®s[0]);break;
- case COP0:
- cop0_assemble(0,®s[0]);break;
- case COP1:
- cop1_assemble(0,®s[0]);break;
- case C1LS:
- c1ls_assemble(0,®s[0]);break;
- case COP2:
- cop2_assemble(0,®s[0]);break;
- case C2LS:
- c2ls_assemble(0,®s[0]);break;
- case C2OP:
- c2op_assemble(0,®s[0]);break;
- case FCONV:
- fconv_assemble(0,®s[0]);break;
- case FLOAT:
- float_assemble(0,®s[0]);break;
- case FCOMP:
- fcomp_assemble(0,®s[0]);break;
- case MULTDIV:
- multdiv_assemble(0,®s[0]);break;
- case MOV:
- mov_assemble(0,®s[0]);break;
- case SYSCALL:
- case HLECALL:
- case INTCALL:
- case SPAN:
- case UJUMP:
- case RJUMP:
- case CJUMP:
- case SJUMP:
- case FJUMP:
- printf("Jump in the delay slot. This is probably a bug.\n");
- }
- int btaddr=get_reg(regs[0].regmap,BTREG);
- if(btaddr<0) {
- btaddr=get_reg(regs[0].regmap,-1);
- emit_readword((int)&branch_target,btaddr);
- }
- assert(btaddr!=HOST_CCREG);
- if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
-#ifdef HOST_IMM8
- emit_movimm(start+4,HOST_TEMPREG);
- emit_cmp(btaddr,HOST_TEMPREG);
-#else
- emit_cmpimm(btaddr,start+4);
-#endif
- int branch=(int)out;
- emit_jeq(0);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1);
- emit_jmp(jump_vaddr_reg[btaddr]);
- set_jump_target(branch,(int)out);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
- load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
-}
-
-// Basic liveness analysis for MIPS registers
-void unneeded_registers(int istart,int iend,int r)
-{
- int i;
- uint64_t u,uu,gte_u,b,bu,gte_bu;
- uint64_t temp_u,temp_uu,temp_gte_u=0;
- uint64_t tdep;
- uint64_t gte_u_unknown=0;
- if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED)
- gte_u_unknown=~0ll;
- if(iend==slen-1) {
- u=1;uu=1;
- gte_u=gte_u_unknown;
- }else{
- u=unneeded_reg[iend+1];
- uu=unneeded_reg_upper[iend+1];
- u=1;uu=1;
- gte_u=gte_unneeded[iend+1];
- }
-
- for (i=iend;i>=istart;i--)
- {
- //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
- if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
- {
- // If subroutine call, flag return address as a possible branch target
- if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
-
- if(ba[i]<start || ba[i]>=(start+slen*4))
- {
- // Branch out of this block, flush all regs
- u=1;
- uu=1;
- gte_u=gte_u_unknown;
- /* Hexagon hack
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- uu=u=0x300C00F; // Discard at, v0-v1, t6-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- uu=u=0x300C0F3; // Discard at, a0-a3, t6-t9
- }
- if(start>0x80000400&&start<0x80000000+RAM_SIZE) {
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- //uu=u=0x30300FF0FLL; // Discard at, v0-v1, t0-t9, lo, hi
- uu=u=0x300FF0F; // Discard at, v0-v1, t0-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- //uu=u=0x30300FFF3LL; // Discard at, a0-a3, t0-t9, lo, hi
- uu=u=0x300FFF3; // Discard at, a0-a3, t0-t9
- }
- }*/
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- gte_u|=gte_rt[i+1];
- gte_u&=~gte_rs[i+1];
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- gte_u&=gte_unneeded[i+2];
- }
- else
- {
- u=1;
- uu=1;
- gte_u=gte_u_unknown;
- }
- }
- }
- else
- {
- // Internal branch, flag target
- bt[(ba[i]-start)>>2]=1;
- if(ba[i]<=start+i*4) {
- // Backward branch
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- temp_u=1;temp_uu=1;
- temp_gte_u=0;
- } else {
- // Conditional branch (not taken case)
- temp_u=unneeded_reg[i+2];
- temp_uu=unneeded_reg_upper[i+2];
- temp_gte_u&=gte_unneeded[i+2];
- }
- // Merge in delay slot
- tdep=(~temp_uu>>rt1[i+1])&1;
- temp_u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- temp_u|=1;temp_uu|=1;
- temp_gte_u|=gte_rt[i+1];
- temp_gte_u&=~gte_rs[i+1];
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- temp_u&=unneeded_reg[i+2];
- temp_uu&=unneeded_reg_upper[i+2];
- temp_gte_u&=gte_unneeded[i+2];
- }
- else
- {
- temp_u=1;
- temp_uu=1;
- temp_gte_u=gte_u_unknown;
- }
- }
- tdep=(~temp_uu>>rt1[i])&1;
- temp_u|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_uu|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
- temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
- temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
- temp_u|=1;temp_uu|=1;
- temp_gte_u|=gte_rt[i];
- temp_gte_u&=~gte_rs[i];
- unneeded_reg[i]=temp_u;
- unneeded_reg_upper[i]=temp_uu;
- gte_unneeded[i]=temp_gte_u;
- // Only go three levels deep. This recursion can take an
- // excessive amount of time if there are a lot of nested loops.
- if(r<2) {
- unneeded_registers((ba[i]-start)>>2,i-1,r+1);
- }else{
- unneeded_reg[(ba[i]-start)>>2]=1;
- unneeded_reg_upper[(ba[i]-start)>>2]=1;
- gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown;
- }
- } /*else*/ if(1) {
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- u=unneeded_reg[(ba[i]-start)>>2];
- uu=unneeded_reg_upper[(ba[i]-start)>>2];
- gte_u=gte_unneeded[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- //u=1;
- //uu=1;
- //branch_unneeded_reg[i]=u;
- //branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- gte_u|=gte_rt[i+1];
- gte_u&=~gte_rs[i+1];
- } else {
- // Conditional branch
- b=unneeded_reg[(ba[i]-start)>>2];
- bu=unneeded_reg_upper[(ba[i]-start)>>2];
- gte_bu=gte_unneeded[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=b;
- branch_unneeded_reg_upper[i]=bu;
- //b=1;
- //bu=1;
- //branch_unneeded_reg[i]=b;
- //branch_unneeded_reg_upper[i]=bu;
- // Branch delay slot
- tdep=(~uu>>rt1[i+1])&1;
- b|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- bu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- b&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- b|=1;bu|=1;
- gte_bu|=gte_rt[i+1];
- gte_bu&=~gte_rs[i+1];
- // If branch is "likely" then we skip the
- // delay slot on the fall-thru path
- if(likely[i]) {
- u=b;
- uu=bu;
- gte_u=gte_bu;
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- gte_u&=gte_unneeded[i+2];
- //u=1;
- //uu=1;
- }
- } else {
- u&=b;
- uu&=bu;
- gte_u&=gte_bu;
- //u=1;
- //uu=1;
- }
- if(i<slen-1) {
- branch_unneeded_reg[i]&=unneeded_reg[i+2];
- branch_unneeded_reg_upper[i]&=unneeded_reg_upper[i+2];
- //branch_unneeded_reg[i]=1;
- //branch_unneeded_reg_upper[i]=1;
- } else {
- branch_unneeded_reg[i]=1;
- branch_unneeded_reg_upper[i]=1;
- }
- }
- }
- }
- }
- else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
- {
- // SYSCALL instruction (software interrupt)
- u=1;
- uu=1;
- }
- else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
- u=1;
- uu=1;
- }
- //u=uu=1; // DEBUG
- tdep=(~uu>>rt1[i])&1;
- // Written registers are unneeded
- u|=1LL<<rt1[i];
- u|=1LL<<rt2[i];
- uu|=1LL<<rt1[i];
- uu|=1LL<<rt2[i];
- gte_u|=gte_rt[i];
- // Accessed registers are needed
- u&=~(1LL<<rs1[i]);
- u&=~(1LL<<rs2[i]);
- uu&=~(1LL<<us1[i]);
- uu&=~(1LL<<us2[i]);
- gte_u&=~gte_rs[i];
- if(gte_rs[i]&&rt1[i]&&(unneeded_reg[i+1]&(1ll<<rt1[i])))
- gte_u|=gte_rs[i]; // MFC2/CFC2 to dead register, unneeded
- // Source-target dependencies
- uu&=~(tdep<<dep1[i]);
- uu&=~(tdep<<dep2[i]);
- // R0 is always unneeded
- u|=1;uu|=1;
- // Save it
- unneeded_reg[i]=u;
- unneeded_reg_upper[i]=uu;
- gte_unneeded[i]=gte_u;
- /*
- printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
- printf("U:");
- int r;
- for(r=1;r<=CCREG;r++) {
- if((unneeded_reg[i]>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf(" UU:");
- for(r=1;r<=CCREG;r++) {
- if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf("\n");*/
- }
-#ifdef FORCE32
- for (i=iend;i>=istart;i--)
- {
- unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL;
- }
-#endif
-}
-
-// Identify registers which are likely to contain 32-bit values
-// This is used to predict whether any branches will jump to a
-// location with 64-bit values in registers.
-static void provisional_32bit()
-{
- int i,j;
- uint64_t is32=1;
- uint64_t lastbranch=1;
-
- for(i=0;i<slen;i++)
- {
- if(i>0) {
- if(itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP) {
- if(i>1) is32=lastbranch;
- else is32=1;
- }
- }
- if(i>1)
- {
- if(itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP) {
- if(likely[i-2]) {
- if(i>2) is32=lastbranch;
- else is32=1;
- }
- }
- if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL
- {
- if(rs1[i-2]==0||rs2[i-2]==0)
- {
- if(rs1[i-2]) {
- is32|=1LL<<rs1[i-2];
- }
- if(rs2[i-2]) {
- is32|=1LL<<rs2[i-2];
- }
- }
- }
- }
- // If something jumps here with 64-bit values
- // then promote those registers to 64 bits
- if(bt[i])
- {
- uint64_t temp_is32=is32;
- for(j=i-1;j>=0;j--)
- {
- if(ba[j]==start+i*4)
- //temp_is32&=branch_regs[j].is32;
- temp_is32&=p32[j];
- }
- for(j=i;j<slen;j++)
- {
- if(ba[j]==start+i*4)
- temp_is32=1;
- }
- is32=temp_is32;
- }
- int type=itype[i];
- int op=opcode[i];
- int op2=opcode2[i];
- int rt=rt1[i];
- int s1=rs1[i];
- int s2=rs2[i];
- if(type==UJUMP||type==RJUMP||type==CJUMP||type==SJUMP||type==FJUMP) {
- // Branches don't write registers, consider the delay slot instead.
- type=itype[i+1];
- op=opcode[i+1];
- op2=opcode2[i+1];
- rt=rt1[i+1];
- s1=rs1[i+1];
- s2=rs2[i+1];
- lastbranch=is32;
- }
- switch(type) {
- case LOAD:
- if(opcode[i]==0x27||opcode[i]==0x37|| // LWU/LD
- opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
- is32&=~(1LL<<rt);
- else
- is32|=1LL<<rt;
- break;
- case STORE:
- case STORELR:
- break;
- case LOADLR:
- if(op==0x1a||op==0x1b) is32&=~(1LL<<rt); // LDR/LDL
- if(op==0x22) is32|=1LL<<rt; // LWL
- break;
- case IMM16:
- if (op==0x08||op==0x09|| // ADDI/ADDIU
- op==0x0a||op==0x0b|| // SLTI/SLTIU
- op==0x0c|| // ANDI
- op==0x0f) // LUI
- {
- is32|=1LL<<rt;
- }
- if(op==0x18||op==0x19) { // DADDI/DADDIU
- is32&=~(1LL<<rt);
- //if(imm[i]==0)
- // is32|=((is32>>s1)&1LL)<<rt;
- }
- if(op==0x0d||op==0x0e) { // ORI/XORI
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- break;
- case UJUMP:
- break;
- case RJUMP:
- break;
- case CJUMP:
- break;
- case SJUMP:
- break;
- case FJUMP:
- break;
- case ALU:
- if(op2>=0x20&&op2<=0x23) { // ADD/ADDU/SUB/SUBU
- is32|=1LL<<rt;
- }
- if(op2==0x2a||op2==0x2b) { // SLT/SLTU
- is32|=1LL<<rt;
- }
- else if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR
- uint64_t sr=((is32>>s1)&(is32>>s2)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else if(op2>=0x2c&&op2<=0x2d) { // DADD/DADDU
- if(s1==0&&s2==0) {
- is32|=1LL<<rt;
- }
- else if(s2==0) {
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else if(s1==0) {
- uint64_t sr=((is32>>s2)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else {
- is32&=~(1LL<<rt);
- }
- }
- else if(op2>=0x2e&&op2<=0x2f) { // DSUB/DSUBU
- if(s1==0&&s2==0) {
- is32|=1LL<<rt;
- }
- else if(s2==0) {
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- else {
- is32&=~(1LL<<rt);
- }
- }
- break;
- case MULTDIV:
- if (op2>=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU
- is32&=~((1LL<<HIREG)|(1LL<<LOREG));
- }
- else {
- is32|=(1LL<<HIREG)|(1LL<<LOREG);
- }
- break;
- case MOV:
- {
- uint64_t sr=((is32>>s1)&1LL);
- is32&=~(1LL<<rt);
- is32|=sr<<rt;
- }
- break;
- case SHIFT:
- if(op2>=0x14&&op2<=0x17) is32&=~(1LL<<rt); // DSLLV/DSRLV/DSRAV
- else is32|=1LL<<rt; // SLLV/SRLV/SRAV
- break;
- case SHIFTIMM:
- is32|=1LL<<rt;
- // DSLL/DSRL/DSRA/DSLL32/DSRL32 but not DSRA32 have 64-bit result
- if(op2>=0x38&&op2<0x3f) is32&=~(1LL<<rt);
- break;
- case COP0:
- if(op2==0) is32|=1LL<<rt; // MFC0
- break;
- case COP1:
- case COP2:
- if(op2==0) is32|=1LL<<rt; // MFC1
- if(op2==1) is32&=~(1LL<<rt); // DMFC1
- if(op2==2) is32|=1LL<<rt; // CFC1
- break;
- case C1LS:
- case C2LS:
- break;
- case FLOAT:
- case FCONV:
- break;
- case FCOMP:
- break;
- case C2OP:
- case SYSCALL:
- case HLECALL:
- break;
- default:
- break;
- }
- is32|=1;
- p32[i]=is32;
-
- if(i>0)
- {
- if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)
- {
- if(rt1[i-1]==31) // JAL/JALR
- {
- // Subroutine call will return here, don't alloc any registers
- is32=1;
- }
- else if(i+1<slen)
- {
- // Internal branch will jump here, match registers to caller
- is32=0x3FFFFFFFFLL;
- }
- }
- }