- signed char *i_regmap=i_regs->regmap;
- int cc;
- int match;
- match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
- assem_debug("fmatch=%d\n",match);
- int fs,cs;
- int eaddr;
- int invert=0;
- int internal=internal_branch(branch_regs[i].is32,ba[i]);
- if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
- if(!match) invert=1;
- #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- if(i>(ba[i]-start)>>2) invert=1;
- #endif
-
- if(ooo[i]) {
- fs=get_reg(branch_regs[i].regmap,FSREG);
- address_generation(i+1,i_regs,regs[i].regmap_entry); // Is this okay?
- }
- else {
- fs=get_reg(i_regmap,FSREG);
- }
-
- // Check cop1 unusable
- if(!cop1_usable) {
- cs=get_reg(i_regmap,CSREG);
- assert(cs>=0);
- emit_testimm(cs,0x20000000);
- eaddr=(int)out;
- emit_jeq(0);
- add_stub(FP_STUB,eaddr,(int)out,i,cs,(int)i_regs,0,0);
- cop1_usable=1;
- }
-
- if(ooo[i]) {
- // Out of order execution (delay slot first)
- //printf("OOOE\n");
- ds_assemble(i+1,i_regs);
- int adj;
- uint64_t bc_unneeded=branch_regs[i].u;
- uint64_t bc_unneeded_upper=branch_regs[i].uu;
- bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
- bc_unneeded_upper&=~((1LL<<us1[i])|(1LL<<us2[i]));
- bc_unneeded|=1;
- bc_unneeded_upper|=1;
- wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
- bc_unneeded,bc_unneeded_upper);
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs1[i]);
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
- cc=get_reg(branch_regs[i].regmap,CCREG);
- assert(cc==HOST_CCREG);
- do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
- assem_debug("cycle count (adj)\n");
- if(1) {
- int nottaken=0;
- if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
- if(1) {
- assert(fs>=0);
- emit_testimm(fs,0x800000);
- if(source[i]&0x10000) // BC1T
- {
- if(invert){
- nottaken=(int)out;
- emit_jeq(1);
- }else{
- add_to_linker((int)out,ba[i],internal);
- emit_jne(0);
- }
- }
- else // BC1F
- if(invert){
- nottaken=(int)out;
- emit_jne(1);
- }else{
- add_to_linker((int)out,ba[i],internal);
- emit_jeq(0);
- }
- {
- }
- } // if(!only32)
-
- if(invert) {
- if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
- #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- else if(match) emit_addnop(13);
- #endif
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
- if(internal)
- assem_debug("branch: internal\n");
- else
- assem_debug("branch: external\n");
- if(internal&&is_ds[(ba[i]-start)>>2]) {
- ds_assemble_entry(i);
- }
- else {
- add_to_linker((int)out,ba[i],internal);
- emit_jmp(0);
- }
- set_jump_target(nottaken,(int)out);
- }
-
- if(adj) {
- if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
- }
- } // (!unconditional)
- } // if(ooo)
- else
- {
- // In-order execution (branch first)
- //printf("IOE\n");
- int nottaken=0;
- if(1) {
- //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
- if(1) {
- assert(fs>=0);
- emit_testimm(fs,0x800000);
- if(source[i]&0x10000) // BC1T
- {
- nottaken=(int)out;
- emit_jeq(1);
- }
- else // BC1F
- {
- nottaken=(int)out;
- emit_jne(1);
- }
- }
- } // if(!unconditional)
- int adj;
- uint64_t ds_unneeded=branch_regs[i].u;
- uint64_t ds_unneeded_upper=branch_regs[i].uu;
- ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
- ds_unneeded_upper&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
- if((~ds_unneeded_upper>>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
- ds_unneeded|=1;
- ds_unneeded_upper|=1;
- // branch taken
- //assem_debug("1:\n");
- wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
- ds_unneeded,ds_unneeded_upper);
- // load regs
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
- address_generation(i+1,&branch_regs[i],0);
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
- ds_assemble(i+1,&branch_regs[i]);
- cc=get_reg(branch_regs[i].regmap,CCREG);
- if(cc==-1) {
- emit_loadreg(CCREG,cc=HOST_CCREG);
- // CHECK: Is the following instruction (fall thru) allocated ok?
- }
- assert(cc==HOST_CCREG);
- store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
- do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
- assem_debug("cycle count (adj)\n");
- if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
- load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
- if(internal)
- assem_debug("branch: internal\n");
- else
- assem_debug("branch: external\n");
- if(internal&&is_ds[(ba[i]-start)>>2]) {
- ds_assemble_entry(i);
- }
- else {
- add_to_linker((int)out,ba[i],internal);
- emit_jmp(0);
- }
-
- // branch not taken
- if(1) { // <- FIXME (don't need this)
- set_jump_target(nottaken,(int)out);
- assem_debug("1:\n");
- if(!likely[i]) {
- wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
- ds_unneeded,ds_unneeded_upper);
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
- address_generation(i+1,&branch_regs[i],0);
- load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
- ds_assemble(i+1,&branch_regs[i]);
- }
- cc=get_reg(branch_regs[i].regmap,CCREG);
- if(cc==-1&&!likely[i]) {
- // Cycle count isn't in a register, temporarily load it then write it out
- emit_loadreg(CCREG,HOST_CCREG);
- emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
- int jaddr=(int)out;
- emit_jns(0);
- add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
- emit_storereg(CCREG,HOST_CCREG);
- }
- else{
- cc=get_reg(i_regmap,CCREG);
- assert(cc==HOST_CCREG);
- emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
- int jaddr=(int)out;
- emit_jns(0);
- add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
- }
- }
- }
-}
-
-static void pagespan_assemble(int i,struct regstat *i_regs)
-{
- int s1l=get_reg(i_regs->regmap,rs1[i]);
- int s1h=get_reg(i_regs->regmap,rs1[i]|64);
- int s2l=get_reg(i_regs->regmap,rs2[i]);
- int s2h=get_reg(i_regs->regmap,rs2[i]|64);
- int taken=0;
- int nottaken=0;
- int unconditional=0;
- if(rs1[i]==0)
- {
- s1l=s2l;s1h=s2h;
- s2l=s2h=-1;
- }
- else if(rs2[i]==0)
- {
- s2l=s2h=-1;
- }
- if((i_regs->is32>>rs1[i])&(i_regs->is32>>rs2[i])&1) {
- s1h=s2h=-1;
- }
- int hr=0;
- int addr=-1,alt=-1,ntaddr=-1;
- if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;}
- else {
- while(hr<HOST_REGS)
- {
- if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
- (i_regs->regmap[hr]&63)!=rs1[i] &&
- (i_regs->regmap[hr]&63)!=rs2[i] )
- {
- addr=hr++;break;
- }
- hr++;
- }
- }
- while(hr<HOST_REGS)
- {
- if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
- (i_regs->regmap[hr]&63)!=rs1[i] &&
- (i_regs->regmap[hr]&63)!=rs2[i] )
- {
- alt=hr++;break;
- }
- hr++;
- }
- if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
- {
- while(hr<HOST_REGS)
- {
- if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
- (i_regs->regmap[hr]&63)!=rs1[i] &&
- (i_regs->regmap[hr]&63)!=rs2[i] )
- {
- ntaddr=hr;break;
- }
- hr++;
- }
- }
- assert(hr<HOST_REGS);
- if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1
- load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
- }
- emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
- if(opcode[i]==2) // J
- {
- unconditional=1;
- }
- if(opcode[i]==3) // JAL
- {
- // TODO: mini_ht
- int rt=get_reg(i_regs->regmap,31);
- emit_movimm(start+i*4+8,rt);
- unconditional=1;
- }
- if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
- {
- emit_mov(s1l,addr);
- if(opcode2[i]==9) // JALR
- {
- int rt=get_reg(i_regs->regmap,rt1[i]);
- emit_movimm(start+i*4+8,rt);
- }
- }
- if((opcode[i]&0x3f)==4) // BEQ
- {
- if(rs1[i]==rs2[i])
- {
- unconditional=1;
- }
- else
- #ifdef HAVE_CMOV_IMM
- if(s1h<0) {
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
- }
- else
- #endif
- {
- assert(s1l>=0);
- emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- emit_cmovne_reg(alt,addr);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmovne_reg(alt,addr);
- }
- }
- if((opcode[i]&0x3f)==5) // BNE
- {
- #ifdef HAVE_CMOV_IMM
- if(s1h<0) {
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
- }
- else
- #endif
- {
- assert(s1l>=0);
- emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- emit_cmovne_reg(alt,addr);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- emit_cmovne_reg(alt,addr);
- }
- }
- if((opcode[i]&0x3f)==0x14) // BEQL
- {
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- nottaken=(int)out;
- emit_jne(0);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- if(nottaken) set_jump_target(nottaken,(int)out);
- nottaken=(int)out;
- emit_jne(0);
- }
- if((opcode[i]&0x3f)==0x15) // BNEL
- {
- if(s1h>=0) {
- if(s2h>=0) emit_cmp(s1h,s2h);
- else emit_test(s1h,s1h);
- taken=(int)out;
- emit_jne(0);
- }
- if(s2l>=0) emit_cmp(s1l,s2l);
- else emit_test(s1l,s1l);
- nottaken=(int)out;
- emit_jeq(0);
- if(taken) set_jump_target(taken,(int)out);
- }
- if((opcode[i]&0x3f)==6) // BLEZ
- {
- emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
- emit_cmpimm(s1l,1);
- if(s1h>=0) emit_mov(addr,ntaddr);
- emit_cmovl_reg(alt,addr);
- if(s1h>=0) {
- emit_test(s1h,s1h);
- emit_cmovne_reg(ntaddr,addr);
- emit_cmovs_reg(alt,addr);
- }
- }
- if((opcode[i]&0x3f)==7) // BGTZ
- {
- emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
- emit_cmpimm(s1l,1);
- if(s1h>=0) emit_mov(addr,alt);
- emit_cmovl_reg(ntaddr,addr);
- if(s1h>=0) {
- emit_test(s1h,s1h);
- emit_cmovne_reg(alt,addr);
- emit_cmovs_reg(ntaddr,addr);
- }
- }
- if((opcode[i]&0x3f)==0x16) // BLEZL
- {
- assert((opcode[i]&0x3f)!=0x16);
- }
- if((opcode[i]&0x3f)==0x17) // BGTZL
- {
- assert((opcode[i]&0x3f)!=0x17);
- }
- assert(opcode[i]!=1); // BLTZ/BGEZ
-
- //FIXME: Check CSREG
- if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
- if((source[i]&0x30000)==0) // BC1F
- {
- emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
- emit_testimm(s1l,0x800000);
- emit_cmovne_reg(alt,addr);
- }
- if((source[i]&0x30000)==0x10000) // BC1T
- {
- emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
- emit_testimm(s1l,0x800000);
- emit_cmovne_reg(alt,addr);
- }
- if((source[i]&0x30000)==0x20000) // BC1FL
- {
- emit_testimm(s1l,0x800000);
- nottaken=(int)out;
- emit_jne(0);
- }
- if((source[i]&0x30000)==0x30000) // BC1TL
- {
- emit_testimm(s1l,0x800000);
- nottaken=(int)out;
- emit_jeq(0);
- }
- }
-
- assert(i_regs->regmap[HOST_CCREG]==CCREG);
- wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
- if(likely[i]||unconditional)
- {
- emit_movimm(ba[i],HOST_BTREG);
- }
- else if(addr!=HOST_BTREG)
- {
- emit_mov(addr,HOST_BTREG);
- }
- void *branch_addr=out;
- emit_jmp(0);
- int target_addr=start+i*4+5;
- void *stub=out;
- void *compiled_target_addr=check_addr(target_addr);
- emit_extjump_ds((int)branch_addr,target_addr);
- if(compiled_target_addr) {
- set_jump_target((int)branch_addr,(int)compiled_target_addr);
- add_link(target_addr,stub);
- }
- else set_jump_target((int)branch_addr,(int)stub);
- if(likely[i]) {
- // Not-taken path
- set_jump_target((int)nottaken,(int)out);
- wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
- void *branch_addr=out;
- emit_jmp(0);
- int target_addr=start+i*4+8;
- void *stub=out;
- void *compiled_target_addr=check_addr(target_addr);
- emit_extjump_ds((int)branch_addr,target_addr);
- if(compiled_target_addr) {
- set_jump_target((int)branch_addr,(int)compiled_target_addr);
- add_link(target_addr,stub);
- }
- else set_jump_target((int)branch_addr,(int)stub);
- }
-}
-
-// Assemble the delay slot for the above
-static void pagespan_ds()
-{
- assem_debug("initial delay slot:\n");
- u_int vaddr=start+1;
- u_int page=get_page(vaddr);
- u_int vpage=get_vpage(vaddr);
- ll_add(jump_dirty+vpage,vaddr,(void *)out);
- do_dirty_stub_ds();
- ll_add(jump_in+page,vaddr,(void *)out);
- assert(regs[0].regmap_entry[HOST_CCREG]==CCREG);
- if(regs[0].regmap[HOST_CCREG]!=CCREG)
- wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32);
- if(regs[0].regmap[HOST_BTREG]!=BTREG)
- emit_writeword(HOST_BTREG,(int)&branch_target);
- load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]);
- address_generation(0,®s[0],regs[0].regmap_entry);
- if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a)
- load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP);
- cop1_usable=0;
- is_delayslot=0;
- switch(itype[0]) {
- case ALU:
- alu_assemble(0,®s[0]);break;
- case IMM16:
- imm16_assemble(0,®s[0]);break;
- case SHIFT:
- shift_assemble(0,®s[0]);break;
- case SHIFTIMM:
- shiftimm_assemble(0,®s[0]);break;
- case LOAD:
- load_assemble(0,®s[0]);break;
- case LOADLR:
- loadlr_assemble(0,®s[0]);break;
- case STORE:
- store_assemble(0,®s[0]);break;
- case STORELR:
- storelr_assemble(0,®s[0]);break;
- case COP0:
- cop0_assemble(0,®s[0]);break;
- case COP1:
- cop1_assemble(0,®s[0]);break;
- case C1LS:
- c1ls_assemble(0,®s[0]);break;
- case COP2:
- cop2_assemble(0,®s[0]);break;
- case C2LS:
- c2ls_assemble(0,®s[0]);break;
- case C2OP:
- c2op_assemble(0,®s[0]);break;
- case FCONV:
- fconv_assemble(0,®s[0]);break;
- case FLOAT:
- float_assemble(0,®s[0]);break;
- case FCOMP:
- fcomp_assemble(0,®s[0]);break;
- case MULTDIV:
- multdiv_assemble(0,®s[0]);break;
- case MOV:
- mov_assemble(0,®s[0]);break;
- case SYSCALL:
- case HLECALL:
- case INTCALL:
- case SPAN:
- case UJUMP:
- case RJUMP:
- case CJUMP:
- case SJUMP:
- case FJUMP:
- SysPrintf("Jump in the delay slot. This is probably a bug.\n");
- }
- int btaddr=get_reg(regs[0].regmap,BTREG);
- if(btaddr<0) {
- btaddr=get_reg(regs[0].regmap,-1);
- emit_readword((int)&branch_target,btaddr);