#include <stdlib.h>
#include <stdint.h> //include for uint64_t
#include <assert.h>
+#include <sys/mman.h>
#include "emu_if.h" //emulator interface
-#include <sys/mman.h>
+//#define DISASM
+//#define assem_debug printf
+//#define inv_debug printf
+#define assem_debug(...)
+#define inv_debug(...)
#ifdef __i386__
#include "assem_x86.h"
u_char dep1[MAXBLOCK];
u_char dep2[MAXBLOCK];
u_char lt1[MAXBLOCK];
+ static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
+ static uint64_t gte_rt[MAXBLOCK];
+ static uint64_t gte_unneeded[MAXBLOCK];
+ static int gte_reads_flags; // gte flag read encountered
int imm[MAXBLOCK];
u_int ba[MAXBLOCK];
char likely[MAXBLOCK];
//#define DEBUG_CYCLE_COUNT 1
-void nullf() {}
-//#define assem_debug printf
-//#define inv_debug printf
-#define assem_debug nullf
-#define inv_debug nullf
-
static void tlb_hacks()
{
#ifndef DISABLE_TLB
}
}
-
+#ifndef FORCE32
void div64(int64_t dividend,int64_t divisor)
{
lo=dividend/divisor;
else original=loaded;
return original;
}
+#endif
#ifdef __i386__
#include "assem_x86.c"
}
type=STORED_STUB;
}
+#ifdef PCSX
+ if(jaddr) {
+ // PCSX store handlers don't check invcode again
+ reglist|=1<<addr;
+ add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
+ jaddr=0;
+ }
+#endif
if(!using_tlb) {
if(!c||memtarget) {
#ifdef DESTRUCTIVE_SHIFT
void unneeded_registers(int istart,int iend,int r)
{
int i;
- uint64_t u,uu,b,bu;
- uint64_t temp_u,temp_uu;
+ uint64_t u,uu,gte_u,b,bu,gte_bu;
+ uint64_t temp_u,temp_uu,temp_gte_u;
uint64_t tdep;
if(iend==slen-1) {
u=1;uu=1;
uu=unneeded_reg_upper[iend+1];
u=1;uu=1;
}
+ gte_u=temp_gte_u=0;
+
for (i=iend;i>=istart;i--)
{
//printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
// Branch out of this block, flush all regs
u=1;
uu=1;
+ gte_u=0;
/* Hexagon hack
if(itype[i]==UJUMP&&rt1[i]==31)
{
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
}
else
{
u=1;
uu=1;
+ gte_u=0;
}
}
}
{
// Unconditional branch
temp_u=1;temp_uu=1;
+ temp_gte_u=0;
} else {
// Conditional branch (not taken case)
temp_u=unneeded_reg[i+2];
temp_uu=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
// Merge in delay slot
tdep=(~temp_uu>>rt1[i+1])&1;
temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i+1];
+ temp_gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
temp_u&=unneeded_reg[i+2];
temp_uu&=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
else
{
temp_u=1;
temp_uu=1;
+ temp_gte_u=0;
}
}
tdep=(~temp_uu>>rt1[i])&1;
temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i];
+ temp_gte_u&=~gte_rs[i];
unneeded_reg[i]=temp_u;
unneeded_reg_upper[i]=temp_uu;
+ gte_unneeded[i]=temp_gte_u;
// Only go three levels deep. This recursion can take an
// excessive amount of time if there are a lot of nested loops.
if(r<2) {
}else{
unneeded_reg[(ba[i]-start)>>2]=1;
unneeded_reg_upper[(ba[i]-start)>>2]=1;
+ gte_unneeded[(ba[i]-start)>>2]=0;
}
} /*else*/ if(1) {
if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
// Unconditional branch
u=unneeded_reg[(ba[i]-start)>>2];
uu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_u=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=u;
branch_unneeded_reg_upper[i]=uu;
//u=1;
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
} else {
// Conditional branch
b=unneeded_reg[(ba[i]-start)>>2];
bu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_bu=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=b;
branch_unneeded_reg_upper[i]=bu;
//b=1;
bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
b|=1;bu|=1;
+ gte_bu|=gte_rt[i+1];
+ gte_bu&=~gte_rs[i+1];
// If branch is "likely" then we skip the
// delay slot on the fall-thru path
if(likely[i]) {
u=b;
uu=bu;
+ gte_u=gte_bu;
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
//u=1;
//uu=1;
}
} else {
u&=b;
uu&=bu;
+ gte_u&=gte_bu;
//u=1;
//uu=1;
}
u|=1LL<<rt2[i];
uu|=1LL<<rt1[i];
uu|=1LL<<rt2[i];
+ gte_u|=gte_rt[i];
// Accessed registers are needed
u&=~(1LL<<rs1[i]);
u&=~(1LL<<rs2[i]);
uu&=~(1LL<<us1[i]);
uu&=~(1LL<<us2[i]);
+ gte_u&=~gte_rs[i];
// Source-target dependencies
uu&=~(tdep<<dep1[i]);
uu&=~(tdep<<dep2[i]);
// Save it
unneeded_reg[i]=u;
unneeded_reg_upper[i]=uu;
+ gte_unneeded[i]=gte_u;
/*
printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
printf("U:");
}
}
+#ifdef DISASM
/* disassembly */
void disassemble_inst(int i)
{
printf (" %x: %s\n",start+i*4,insn[i]);
}
}
+#else
+static void disassemble_inst(int i) {}
+#endif // DISASM
// clear the state completely, instead of just marking
// things invalid like invalidate_all_pages() does
literalcount=0;
stop_after_jal=0;
inv_code_start=inv_code_end=~0;
+ gte_reads_flags=0;
// TLB
#ifndef DISABLE_TLB
using_tlb=0;
#endif
#ifdef PCSX
case 0x12: strcpy(insn[i],"COP2"); type=NI;
- // note: COP MIPS-1 encoding differs from MIPS32
op2=(source[i]>>21)&0x1f;
- if (source[i]&0x3f) {
+ //if (op2 & 0x10) {
+ if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
if (gte_handlers[source[i]&0x3f]!=NULL) {
- snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
+ if (gte_regnames[source[i]&0x3f]!=NULL)
+ strcpy(insn[i],gte_regnames[source[i]&0x3f]);
+ else
+ snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
type=C2OP;
}
}
us2[i]=0;
dep1[i]=0;
dep2[i]=0;
+ gte_rs[i]=gte_rt[i]=0;
switch(type) {
case LOAD:
rs1[i]=(source[i]>>21)&0x1f;
if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET
break;
case COP1:
- case COP2:
rs1[i]=0;
rs2[i]=0;
rt1[i]=0;
if(op2==5) us1[i]=rs1[i]; // DMTC1
rs2[i]=CSREG;
break;
+ case COP2:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2
+ if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2
+ rs2[i]=CSREG;
+ int gr=(source[i]>>11)&0x1F;
+ switch(op2)
+ {
+ case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
+ case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
+ case 0x02: gte_rs[i]=1ll<<(gr+32); // CFC2
+ if(gr==31&&!gte_reads_flags) {
+ assem_debug("gte flag read encountered @%08x\n",addr + i*4);
+ gte_reads_flags=1;
+ }
+ break;
+ case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
+ }
+ break;
case C1LS:
rs1[i]=(source[i]>>21)&0x1F;
rs2[i]=CSREG;
rt1[i]=0;
rt2[i]=0;
imm[i]=(short)source[i];
+ if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
+ else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
+ break;
+ case C2OP:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ gte_rt[i]=1ll<<63; // every op changes flags
+ // TODO: other regs?
break;
case FLOAT:
case FCONV:
if(itype[slen-1]==SPAN) {
bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception
}
-
+
+#ifdef DISASM
/* Debug/disassembly */
- if((void*)assem_debug==(void*)printf)
for(i=0;i<slen;i++)
{
printf("U:");
#endif
}
}
+#endif // DISASM
/* Pass 8 - Assembly */
linkcount=0;stubcount=0;
for(i=0;i<slen;i++)
{
//if(ds) printf("ds: ");
- if((void*)assem_debug==(void*)printf) disassemble_inst(i);
+ disassemble_inst(i);
if(ds) {
ds=0; // Skip delay slot
if(bt[i]) assem_debug("OOPS - branch into delay slot\n");
}
inv_code_start=inv_code_end=~0;
#ifdef PCSX
- // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE
+ // for PCSX we need to mark all mirrors too
if(get_page(start)<(RAM_SIZE>>12))
for(i=start>>12;i<=(start+slen*4)>>12;i++)
- invalid_code[((u_int)0x80000000>>12)|i]=0;
+ invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
+ invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
+ invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
#endif
/* Pass 10 - Free memory by expiring oldest blocks */