#define COP2 27 // Coprocessor 2 move
#define C2LS 28 // Coprocessor 2 load/store
#define C2OP 29 // Coprocessor 2 operation
+#define INTCALL 30// Call interpreter to handle rare corner cases
/* stubs */
#define CC_STUB 1
void jump_syscall_hle();
void jump_eret();
void jump_hlecall();
+void jump_intcall();
void new_dyna_leave();
// TLB
static u_int get_page(u_int vaddr)
{
+#ifndef PCSX
u_int page=(vaddr^0x80000000)>>12;
+#else
+ u_int page=vaddr&~0xe0000000;
+ if (page < 0x1000000)
+ page &= ~0x0e00000; // RAM mirrors
+ page>>=12;
+#endif
#ifndef DISABLE_TLB
if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
#endif
j++;
break;
}
- if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||((source[i+j]&0xfc00003f)==0x0d))
+ if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
{
break;
}
if(((ptr>>shift)==(addr>>shift)) ||
(((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
{
- printf("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
+ inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
u_int host_addr=(u_int)kill_pointer(head->addr);
if((host_addr>>12)!=(old_host_addr>>12)) {
alloc_reg(current,i,CSREG); // Load status
if(opcode2[i]<3) // MFC1/DMFC1/CFC1
{
- assert(rt1[i]);
- clear_const(current,rt1[i]);
- if(opcode2[i]==1) {
- alloc_reg64(current,i,rt1[i]); // DMFC1
- current->is32&=~(1LL<<rt1[i]);
- }else{
- alloc_reg(current,i,rt1[i]); // MFC1/CFC1
- current->is32|=1LL<<rt1[i];
+ if(rt1[i]){
+ clear_const(current,rt1[i]);
+ if(opcode2[i]==1) {
+ alloc_reg64(current,i,rt1[i]); // DMFC1
+ current->is32&=~(1LL<<rt1[i]);
+ }else{
+ alloc_reg(current,i,rt1[i]); // MFC1/CFC1
+ current->is32|=1LL<<rt1[i];
+ }
+ dirty_reg(current,rt1[i]);
}
- dirty_reg(current,rt1[i]);
alloc_reg_temp(current,i,-1);
}
else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
emit_jmp((int)jump_hlecall);
}
+void intcall_assemble(int i,struct regstat *i_regs)
+{
+ signed char ccreg=get_reg(i_regs->regmap,CCREG);
+ assert(ccreg==HOST_CCREG);
+ assert(!is_delayslot);
+ emit_movimm(start+i*4,0); // Get PC
+ emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
+ emit_jmp((int)jump_intcall);
+}
+
void ds_assemble(int i,struct regstat *i_regs)
{
is_delayslot=1;
mov_assemble(i,i_regs);break;
case SYSCALL:
case HLECALL:
+ case INTCALL:
case SPAN:
case UJUMP:
case RJUMP:
mov_assemble(t,®s[t]);break;
case SYSCALL:
case HLECALL:
+ case INTCALL:
case SPAN:
case UJUMP:
case RJUMP:
//if(opcode2[i]>=0x10) return; // FIXME (BxxZAL)
//assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL)
- if(ooo)
+ if(ooo) {
if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))
- {
- // Write-after-read dependency prevents out of order execution
- // First test branch condition, then execute delay slot, then branch
- ooo=0;
+ {
+ // Write-after-read dependency prevents out of order execution
+ // First test branch condition, then execute delay slot, then branch
+ ooo=0;
+ }
+ if(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31))
+ // BxxZAL $ra is available to delay insn, so do it in order
+ ooo=0;
}
- assert(opcode2[i]<0x10||ooo); // FIXME (BxxZALL)
if(ooo) {
s1l=get_reg(branch_regs[i].regmap,rs1[i]);
load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
if(rt1[i]==31) {
int rt,return_address;
- assert(rt1[i+1]!=31);
- assert(rt2[i+1]!=31);
rt=get_reg(branch_regs[i].regmap,31);
assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
if(rt>=0) {
// In-order execution (branch first)
//printf("IOE\n");
int nottaken=0;
+ if(rt1[i]==31) {
+ int rt,return_address;
+ rt=get_reg(branch_regs[i].regmap,31);
+ if(rt>=0) {
+ // Save the PC even if the branch is not taken
+ return_address=start+i*4+8;
+ emit_movimm(return_address,rt); // PC into link register
+ #ifdef IMM_PREFETCH
+ emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
+ #endif
+ }
+ }
if(!unconditional) {
//printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
if(!only32)
{
assert(s1h>=0);
- if((opcode2[i]&0x1d)==0) // BLTZ/BLTZL
+ if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
{
emit_test(s1h,s1h);
nottaken=(int)out;
emit_jns(1);
}
- if((opcode2[i]&0x1d)==1) // BGEZ/BGEZL
+ if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
{
emit_test(s1h,s1h);
nottaken=(int)out;
else
{
assert(s1l>=0);
- if((opcode2[i]&0x1d)==0) // BLTZ/BLTZL
+ if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
{
emit_test(s1l,s1l);
nottaken=(int)out;
emit_jns(1);
}
- if((opcode2[i]&0x1d)==1) // BGEZ/BGEZL
+ if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
{
emit_test(s1l,s1l);
nottaken=(int)out;
mov_assemble(0,®s[0]);break;
case SYSCALL:
case HLECALL:
+ case INTCALL:
case SPAN:
case UJUMP:
case RJUMP:
}
}
}
- else if(itype[i]==SYSCALL||itype[i]==HLECALL)
+ else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
{
// SYSCALL instruction (software interrupt)
u=1;
if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<<dep2[i+1];
}
}
- else if(itype[i]==SYSCALL||itype[i]==HLECALL)
+ else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
{
// SYSCALL instruction (software interrupt)
r32=0;
}
}
}
- else if(itype[i]==SYSCALL||itype[i]==HLECALL)
+ else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
{
// SYSCALL instruction (software interrupt)
will_dirty_i=0;
case C2LS:
printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]);
break;
+ case INTCALL:
+ printf (" %x: %s (INTCALL)\n",start+i*4,insn[i]);
+ break;
default:
//printf (" %s %8x\n",insn[i],source[i]);
printf (" %x: %s\n",start+i*4,insn[i]);
break;
}
break;
+#ifndef FORCE32
case 0x14: strcpy(insn[i],"BEQL"); type=CJUMP; break;
case 0x15: strcpy(insn[i],"BNEL"); type=CJUMP; break;
case 0x16: strcpy(insn[i],"BLEZL"); type=CJUMP; break;
case 0x17: strcpy(insn[i],"BGTZL"); type=CJUMP; break;
-#ifndef FORCE32
case 0x18: strcpy(insn[i],"DADDI"); type=IMM16; break;
case 0x19: strcpy(insn[i],"DADDIU"); type=IMM16; break;
case 0x1A: strcpy(insn[i],"LDL"); type=LOADLR; break;
#endif
#ifdef PCSX
case 0x12: strcpy(insn[i],"COP2"); type=NI;
+ // note: COP MIPS-1 encoding differs from MIPS32
op2=(source[i]>>21)&0x1f;
- switch(op2)
+ if (source[i]&0x3f) {
+ if (gte_handlers[source[i]&0x3f]!=NULL) {
+ snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
+ type=C2OP;
+ }
+ }
+ else switch(op2)
{
case 0x00: strcpy(insn[i],"MFC2"); type=COP2; break;
case 0x02: strcpy(insn[i],"CFC2"); type=COP2; break;
case 0x04: strcpy(insn[i],"MTC2"); type=COP2; break;
case 0x06: strcpy(insn[i],"CTC2"); type=COP2; break;
- default:
- if (gte_handlers[source[i]&0x3f]!=NULL) {
- snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
- type=C2OP;
- }
- break;
}
break;
case 0x32: strcpy(insn[i],"LWC2"); type=C2LS; break;
printf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr);
break;
}
+#ifdef PCSX
+ /* detect branch in delay slot early */
+ if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP||type==FJUMP) {
+ opcode[i+1]=source[i+1]>>26;
+ opcode2[i+1]=source[i+1]&0x3f;
+ if((0<opcode[i+1]&&opcode[i+1]<8)||(opcode[i+1]==0&&(opcode2[i+1]==8||opcode2[i+1]==9))) {
+ printf("branch in delay slot @%08x (%08x)\n", addr + i*4+4, addr);
+ // don't handle first branch and call interpreter if it's hit
+ type=INTCALL;
+ }
+ }
+#endif
itype[i]=type;
opcode2[i]=op2;
/* Get registers/immediates */
break;
case SYSCALL:
case HLECALL:
+ case INTCALL:
rs1[i]=CCREG;
rs2[i]=0;
rt1[i]=0;
else ba[i]=-1;
/* Is this the end of the block? */
if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) {
+#ifdef PCSX
+ // check for link register access in delay slot
+ int rt1_=rt1[i-1];
+ if(rt1_!=0&&(rs1[i]==rt1_||rs2[i]==rt1_||rt1[i]==rt1_||rt2[i]==rt1_)) {
+ printf("link access in delay slot @%08x (%08x)\n", addr + i*4, addr);
+ ba[i-1]=-1;
+ itype[i-1]=INTCALL;
+ done=2;
+ }
+ else
+#endif
if(rt1[i-1]==0) { // Continue past subroutine call (JAL)
- done=1;
- // Does the block continue due to a branch?
- for(j=i-1;j>=0;j--)
- {
- if(ba[j]==start+i*4+4) done=j=0;
- if(ba[j]==start+i*4+8) done=j=0;
- }
+ done=2;
}
else {
if(stop_after_jal) done=1;
if(i>MAXBLOCK/2) done=1;
}
if(itype[i]==SYSCALL&&stop_after_jal) done=1;
- if(itype[i]==HLECALL) done=1;
+ if(itype[i]==HLECALL||itype[i]==INTCALL) done=2;
+ if(done==2) {
+ // Does the block continue due to a branch?
+ for(j=i-1;j>=0;j--)
+ {
+ if(ba[j]==start+i*4+4) done=j=0;
+ if(ba[j]==start+i*4+8) done=j=0;
+ }
+ }
//assert(i<MAXBLOCK-1);
if(start+i*4==pagelimit-4) done=1;
assert(start+i*4<pagelimit);
if (rt1[i]==31) {
alloc_reg(¤t,i,31);
dirty_reg(¤t,31);
- //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
+ assert(rs1[i+1]!=31&&rs2[i+1]!=31);
assert(rt1[i+1]!=rt1[i]);
#ifdef REG_PREFETCH
alloc_reg(¤t,i,PTEMP);
if (rt1[i]!=0) {
alloc_reg(¤t,i,rt1[i]);
dirty_reg(¤t,rt1[i]);
- //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
+ assert(rs1[i+1]!=rt1[i]&&rs2[i+1]!=rt1[i]);
assert(rt1[i+1]!=rt1[i]);
#ifdef REG_PREFETCH
alloc_reg(¤t,i,PTEMP);
if (rt1[i]==31) { // BLTZAL/BGEZAL
alloc_reg(¤t,i,31);
dirty_reg(¤t,31);
- assert(rs1[i+1]!=31&&rs2[i+1]!=31);
//#ifdef REG_PREFETCH
//alloc_reg(¤t,i,PTEMP);
//#endif
break;
case SYSCALL:
case HLECALL:
+ case INTCALL:
syscall_alloc(¤t,i);
break;
case SPAN:
}
}
}
- else if(itype[i]==SYSCALL||itype[i]==HLECALL)
+ else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
{
// SYSCALL instruction (software interrupt)
nr=0;
f_regmap[hr]=regs[i].regmap[hr];
else f_regmap[hr]=-1;
}
- else if(regs[i].regmap[hr]>=0) f_regmap[hr]=regs[i].regmap[hr];
+ else if(regs[i].regmap[hr]>=0) {
+ if(f_regmap[hr]!=regs[i].regmap[hr]) {
+ // dealloc old register
+ int n;
+ for(n=0;n<HOST_REGS;n++)
+ {
+ if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
+ }
+ // and alloc new one
+ f_regmap[hr]=regs[i].regmap[hr];
+ }
+ }
if(branch_regs[i].regmap[hr]>64) {
if(!((branch_regs[i].dirty>>hr)&1))
f_regmap[hr]=branch_regs[i].regmap[hr];
else f_regmap[hr]=-1;
}
- else if(branch_regs[i].regmap[hr]>=0) f_regmap[hr]=branch_regs[i].regmap[hr];
- // make sure mapping hasn't changed
- int hr2;
- for(hr2=0;hr2<HOST_REGS;hr2++)
- if(hr2!=hr&&f_regmap[hr]==branch_regs[i].regmap[hr2]) {
- f_regmap[hr]=-1;
- break;
+ else if(branch_regs[i].regmap[hr]>=0) {
+ if(f_regmap[hr]!=branch_regs[i].regmap[hr]) {
+ // dealloc old register
+ int n;
+ for(n=0;n<HOST_REGS;n++)
+ {
+ if(f_regmap[n]==branch_regs[i].regmap[hr]) {f_regmap[n]=-1;}
+ }
+ // and alloc new one
+ f_regmap[hr]=branch_regs[i].regmap[hr];
}
+ }
if(itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS
||itype[i+1]==SHIFT||itype[i+1]==COP1||itype[i+1]==FLOAT
||itype[i+1]==FCOMP||itype[i+1]==FCONV
if(!((regs[i].dirty>>hr)&1))
f_regmap[hr]=regs[i].regmap[hr];
}
- else if(regs[i].regmap[hr]>=0) f_regmap[hr]=regs[i].regmap[hr];
+ else if(regs[i].regmap[hr]>=0) {
+ if(f_regmap[hr]!=regs[i].regmap[hr]) {
+ // dealloc old register
+ int n;
+ for(n=0;n<HOST_REGS;n++)
+ {
+ if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
+ }
+ // and alloc new one
+ f_regmap[hr]=regs[i].regmap[hr];
+ }
+ }
else if(regs[i].regmap[hr]<0) count++;
}
}
if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<<dep2[i+1];
}
}
- else if(itype[i]==SYSCALL||itype[i]==HLECALL)
+ else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
{
// SYSCALL instruction (software interrupt)
r32=0;
syscall_assemble(i,®s[i]);break;
case HLECALL:
hlecall_assemble(i,®s[i]);break;
+ case INTCALL:
+ intcall_assemble(i,®s[i]);break;
case UJUMP:
ujump_assemble(i,®s[i]);ds=1;break;
case RJUMP: