- set_jump_target((int)branch_addr,(int)compiled_target_addr);
- add_link(target_addr,stub);
- }
- else set_jump_target((int)branch_addr,(int)stub);
- if(likely[i]) {
- // Not-taken path
- set_jump_target((int)nottaken,(int)out);
- wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
- void *branch_addr=out;
- emit_jmp(0);
- int target_addr=start+i*4+8;
- void *stub=out;
- void *compiled_target_addr=check_addr(target_addr);
- emit_extjump_ds((int)branch_addr,target_addr);
- if(compiled_target_addr) {
- set_jump_target((int)branch_addr,(int)compiled_target_addr);
- add_link(target_addr,stub);
- }
- else set_jump_target((int)branch_addr,(int)stub);
- }
-}
-
-// Assemble the delay slot for the above
-static void pagespan_ds()
-{
- assem_debug("initial delay slot:\n");
- u_int vaddr=start+1;
- u_int page=get_page(vaddr);
- u_int vpage=get_vpage(vaddr);
- ll_add(jump_dirty+vpage,vaddr,(void *)out);
- do_dirty_stub_ds();
- ll_add(jump_in+page,vaddr,(void *)out);
- assert(regs[0].regmap_entry[HOST_CCREG]==CCREG);
- if(regs[0].regmap[HOST_CCREG]!=CCREG)
- wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32);
- if(regs[0].regmap[HOST_BTREG]!=BTREG)
- emit_writeword(HOST_BTREG,(int)&branch_target);
- load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]);
- address_generation(0,®s[0],regs[0].regmap_entry);
- if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a)
- load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP);
- cop1_usable=0;
- is_delayslot=0;
- switch(itype[0]) {
- case ALU:
- alu_assemble(0,®s[0]);break;
- case IMM16:
- imm16_assemble(0,®s[0]);break;
- case SHIFT:
- shift_assemble(0,®s[0]);break;
- case SHIFTIMM:
- shiftimm_assemble(0,®s[0]);break;
- case LOAD:
- load_assemble(0,®s[0]);break;
- case LOADLR:
- loadlr_assemble(0,®s[0]);break;
- case STORE:
- store_assemble(0,®s[0]);break;
- case STORELR:
- storelr_assemble(0,®s[0]);break;
- case COP0:
- cop0_assemble(0,®s[0]);break;
- case COP1:
- cop1_assemble(0,®s[0]);break;
- case C1LS:
- c1ls_assemble(0,®s[0]);break;
- case COP2:
- cop2_assemble(0,®s[0]);break;
- case C2LS:
- c2ls_assemble(0,®s[0]);break;
- case C2OP:
- c2op_assemble(0,®s[0]);break;
- case FCONV:
- fconv_assemble(0,®s[0]);break;
- case FLOAT:
- float_assemble(0,®s[0]);break;
- case FCOMP:
- fcomp_assemble(0,®s[0]);break;
- case MULTDIV:
- multdiv_assemble(0,®s[0]);break;
- case MOV:
- mov_assemble(0,®s[0]);break;
- case SYSCALL:
- case HLECALL:
- case SPAN:
- case UJUMP:
- case RJUMP:
- case CJUMP:
- case SJUMP:
- case FJUMP:
- printf("Jump in the delay slot. This is probably a bug.\n");
- }
- int btaddr=get_reg(regs[0].regmap,BTREG);
- if(btaddr<0) {
- btaddr=get_reg(regs[0].regmap,-1);
- emit_readword((int)&branch_target,btaddr);
- }
- assert(btaddr!=HOST_CCREG);
- if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
-#ifdef HOST_IMM8
- emit_movimm(start+4,HOST_TEMPREG);
- emit_cmp(btaddr,HOST_TEMPREG);
-#else
- emit_cmpimm(btaddr,start+4);
-#endif
- int branch=(int)out;
- emit_jeq(0);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1);
- emit_jmp(jump_vaddr_reg[btaddr]);
- set_jump_target(branch,(int)out);
- store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
- load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
-}
-
-// Basic liveness analysis for MIPS registers
-void unneeded_registers(int istart,int iend,int r)
-{
- int i;
- uint64_t u,uu,b,bu;
- uint64_t temp_u,temp_uu;
- uint64_t tdep;
- if(iend==slen-1) {
- u=1;uu=1;
- }else{
- u=unneeded_reg[iend+1];
- uu=unneeded_reg_upper[iend+1];
- u=1;uu=1;
- }
- for (i=iend;i>=istart;i--)
- {
- //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
- if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
- {
- // If subroutine call, flag return address as a possible branch target
- if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
-
- if(ba[i]<start || ba[i]>=(start+slen*4))
- {
- // Branch out of this block, flush all regs
- u=1;
- uu=1;
- /* Hexagon hack
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- uu=u=0x300C00F; // Discard at, v0-v1, t6-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- uu=u=0x300C0F3; // Discard at, a0-a3, t6-t9
- }
- if(start>0x80000400&&start<0x80000000+RAM_SIZE) {
- if(itype[i]==UJUMP&&rt1[i]==31)
- {
- //uu=u=0x30300FF0FLL; // Discard at, v0-v1, t0-t9, lo, hi
- uu=u=0x300FF0F; // Discard at, v0-v1, t0-t9
- }
- if(itype[i]==RJUMP&&rs1[i]==31)
- {
- //uu=u=0x30300FFF3LL; // Discard at, a0-a3, t0-t9, lo, hi
- uu=u=0x300FFF3; // Discard at, a0-a3, t0-t9
- }
- }*/
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- }
- else
- {
- u=1;
- uu=1;
- }
- }
- }
- else
- {
- // Internal branch, flag target
- bt[(ba[i]-start)>>2]=1;
- if(ba[i]<=start+i*4) {
- // Backward branch
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- temp_u=1;temp_uu=1;
- } else {
- // Conditional branch (not taken case)
- temp_u=unneeded_reg[i+2];
- temp_uu=unneeded_reg_upper[i+2];
- }
- // Merge in delay slot
- tdep=(~temp_uu>>rt1[i+1])&1;
- temp_u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- temp_u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- temp_u|=1;temp_uu|=1;
- // If branch is "likely" (and conditional)
- // then we skip the delay slot on the fall-thru path
- if(likely[i]) {
- if(i<slen-1) {
- temp_u&=unneeded_reg[i+2];
- temp_uu&=unneeded_reg_upper[i+2];
- }
- else
- {
- temp_u=1;
- temp_uu=1;
- }
- }
- tdep=(~temp_uu>>rt1[i])&1;
- temp_u|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_uu|=(1LL<<rt1[i])|(1LL<<rt2[i]);
- temp_u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
- temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
- temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
- temp_u|=1;temp_uu|=1;
- unneeded_reg[i]=temp_u;
- unneeded_reg_upper[i]=temp_uu;
- // Only go three levels deep. This recursion can take an
- // excessive amount of time if there are a lot of nested loops.
- if(r<2) {
- unneeded_registers((ba[i]-start)>>2,i-1,r+1);
- }else{
- unneeded_reg[(ba[i]-start)>>2]=1;
- unneeded_reg_upper[(ba[i]-start)>>2]=1;
- }
- } /*else*/ if(1) {
- if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
- {
- // Unconditional branch
- u=unneeded_reg[(ba[i]-start)>>2];
- uu=unneeded_reg_upper[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=u;
- branch_unneeded_reg_upper[i]=uu;
- //u=1;
- //uu=1;
- //branch_unneeded_reg[i]=u;
- //branch_unneeded_reg_upper[i]=uu;
- // Merge in delay slot
- tdep=(~uu>>rt1[i+1])&1;
- u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- u|=1;uu|=1;
- } else {
- // Conditional branch
- b=unneeded_reg[(ba[i]-start)>>2];
- bu=unneeded_reg_upper[(ba[i]-start)>>2];
- branch_unneeded_reg[i]=b;
- branch_unneeded_reg_upper[i]=bu;
- //b=1;
- //bu=1;
- //branch_unneeded_reg[i]=b;
- //branch_unneeded_reg_upper[i]=bu;
- // Branch delay slot
- tdep=(~uu>>rt1[i+1])&1;
- b|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- bu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
- b&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
- b|=1;bu|=1;
- // If branch is "likely" then we skip the
- // delay slot on the fall-thru path
- if(likely[i]) {
- u=b;
- uu=bu;
- if(i<slen-1) {
- u&=unneeded_reg[i+2];
- uu&=unneeded_reg_upper[i+2];
- //u=1;
- //uu=1;
- }
- } else {
- u&=b;
- uu&=bu;
- //u=1;
- //uu=1;
- }
- if(i<slen-1) {
- branch_unneeded_reg[i]&=unneeded_reg[i+2];
- branch_unneeded_reg_upper[i]&=unneeded_reg_upper[i+2];
- //branch_unneeded_reg[i]=1;
- //branch_unneeded_reg_upper[i]=1;
- } else {
- branch_unneeded_reg[i]=1;
- branch_unneeded_reg_upper[i]=1;
- }
- }
- }
- }
- }
- else if(itype[i]==SYSCALL||itype[i]==HLECALL)
- {
- // SYSCALL instruction (software interrupt)
- u=1;
- uu=1;
- }
- else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
- {
- // ERET instruction (return from interrupt)
- u=1;
- uu=1;
- }
- //u=uu=1; // DEBUG
- tdep=(~uu>>rt1[i])&1;
- // Written registers are unneeded
- u|=1LL<<rt1[i];
- u|=1LL<<rt2[i];
- uu|=1LL<<rt1[i];
- uu|=1LL<<rt2[i];
- // Accessed registers are needed
- u&=~(1LL<<rs1[i]);
- u&=~(1LL<<rs2[i]);
- uu&=~(1LL<<us1[i]);
- uu&=~(1LL<<us2[i]);
- // Source-target dependencies
- uu&=~(tdep<<dep1[i]);
- uu&=~(tdep<<dep2[i]);
- // R0 is always unneeded
- u|=1;uu|=1;
- // Save it
- unneeded_reg[i]=u;
- unneeded_reg_upper[i]=uu;
- /*
- printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
- printf("U:");
- int r;
- for(r=1;r<=CCREG;r++) {
- if((unneeded_reg[i]>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf(" UU:");
- for(r=1;r<=CCREG;r++) {
- if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) {
- if(r==HIREG) printf(" HI");
- else if(r==LOREG) printf(" LO");
- else printf(" r%d",r);
- }
- }
- printf("\n");*/