#include <stdlib.h>
#include <stdint.h> //include for uint64_t
#include <assert.h>
+#include <sys/mman.h>
#include "emu_if.h" //emulator interface
-#include <sys/mman.h>
+//#define DISASM
+//#define assem_debug printf
+//#define inv_debug printf
+#define assem_debug(...)
+#define inv_debug(...)
#ifdef __i386__
#include "assem_x86.h"
u_char dep1[MAXBLOCK];
u_char dep2[MAXBLOCK];
u_char lt1[MAXBLOCK];
+ static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
+ static uint64_t gte_rt[MAXBLOCK];
+ static uint64_t gte_unneeded[MAXBLOCK];
+ static int gte_reads_flags; // gte flag read encountered
+ static u_int smrv[32]; // speculated MIPS register values
+ static u_int smrv_strong; // mask or regs that are likely to have correct values
+ static u_int smrv_weak; // same, but somewhat less likely
+ static u_int smrv_strong_next; // same, but after current insn executes
+ static u_int smrv_weak_next;
int imm[MAXBLOCK];
u_int ba[MAXBLOCK];
char likely[MAXBLOCK];
#else
static const u_int using_tlb=0;
#endif
- static u_int sp_in_mirror;
+ int new_dynarec_did_compile;
u_int stop_after_jal;
extern u_char restore_candidate[512];
extern int cycle_count;
//#define DEBUG_CYCLE_COUNT 1
-void nullf() {}
-//#define assem_debug printf
-//#define inv_debug printf
-#define assem_debug nullf
-#define inv_debug nullf
-
static void tlb_hacks()
{
#ifndef DISABLE_TLB
}
}
-
+#ifndef FORCE32
void div64(int64_t dividend,int64_t divisor)
{
lo=dividend/divisor;
else original=loaded;
return original;
}
+#endif
#ifdef __i386__
#include "assem_x86.c"
void shiftimm_alloc(struct regstat *current,int i)
{
- clear_const(current,rs1[i]);
- clear_const(current,rt1[i]);
if(opcode2[i]<=0x3) // SLL/SRL/SRA
{
if(rt1[i]) {
alloc_reg(current,i,rt1[i]);
current->is32|=1LL<<rt1[i];
dirty_reg(current,rt1[i]);
+ if(is_const(current,rs1[i])) {
+ int v=get_const(current,rs1[i]);
+ if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
+ if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
+ if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
+ }
+ else clear_const(current,rt1[i]);
}
}
+ else
+ {
+ clear_const(current,rs1[i]);
+ clear_const(current,rt1[i]);
+ }
+
if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
{
if(rt1[i]) {
t=get_reg(i_regs->regmap,rt1[i]);
s=get_reg(i_regs->regmap,rs1[i]);
//assert(t>=0);
- if(t>=0){
+ if(t>=0&&!((i_regs->isconst>>t)&1)){
if(rs1[i]==0)
{
emit_zeroreg(t);
if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
#endif
{
- #ifdef PCSX
- if(sp_in_mirror&&rs1[i]==29) {
- emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
- emit_cmpimm(HOST_TEMPREG,RAM_SIZE);
- fastload_reg_override=HOST_TEMPREG;
- }
- else
- #endif
- emit_cmpimm(addr,RAM_SIZE);
- jaddr=(int)out;
- #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
- // Hint to branch predictor that the branch is unlikely to be taken
- if(rs1[i]>=28)
- emit_jno_unlikely(0);
- else
- #endif
- emit_jno(0);
+ jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
}
}
}else{ // using tlb
else addr=s;
if(!using_tlb) {
if(!c) {
- #ifdef PCSX
- if(sp_in_mirror&&rs1[i]==29) {
- emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
- emit_cmpimm(HOST_TEMPREG,RAM_SIZE);
- faststore_reg_override=HOST_TEMPREG;
- }
- else
- #endif
+ #ifndef PCSX
#ifdef R29_HACK
// Strmnnrmn's speed hack
if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
#endif
emit_jno(0);
}
+ #else
+ jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
+ #endif
}
}else{ // using tlb
int x=0;
}
type=STORED_STUB;
}
+#ifdef PCSX
+ if(jaddr) {
+ // PCSX store handlers don't check invcode again
+ reglist|=1<<addr;
+ add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
+ jaddr=0;
+ }
+#endif
if(!using_tlb) {
if(!c||memtarget) {
#ifdef DESTRUCTIVE_SHIFT
int memtarget=0,c=0;
int jaddr2=0,jaddr3,type;
int agr=AGEN1+(i&1);
+ int fastio_reg_override=0;
u_int hr,reglist=0;
u_int copr=(source[i]>>16)&0x1f;
s=get_reg(i_regs->regmap,rs1[i]);
}
else {
if(!c) {
- emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE);
- jaddr2=(int)out;
- emit_jno(0);
+ jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override);
}
if (opcode[i]==0x32) { // LWC2
#ifdef HOST_IMM_ADDR32
if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl);
else
#endif
- emit_readword_indexed(0,ar,tl);
+ int a=ar;
+ if(fastio_reg_override) a=fastio_reg_override;
+ emit_readword_indexed(0,a,tl);
}
if (opcode[i]==0x3a) { // SWC2
#ifdef DESTRUCTIVE_SHIFT
if(!offset&&!c&&s>=0) emit_mov(s,ar);
#endif
- emit_writeword_indexed(tl,0,ar);
+ int a=ar;
+ if(fastio_reg_override) a=fastio_reg_override;
+ emit_writeword_indexed(tl,0,a);
}
}
if(jaddr2)
void ds_assemble(int i,struct regstat *i_regs)
{
+ speculate_register_values(i);
is_delayslot=1;
switch(itype[i]) {
case ALU:
void unneeded_registers(int istart,int iend,int r)
{
int i;
- uint64_t u,uu,b,bu;
- uint64_t temp_u,temp_uu;
+ uint64_t u,uu,gte_u,b,bu,gte_bu;
+ uint64_t temp_u,temp_uu,temp_gte_u;
uint64_t tdep;
if(iend==slen-1) {
u=1;uu=1;
uu=unneeded_reg_upper[iend+1];
u=1;uu=1;
}
+ gte_u=temp_gte_u=0;
+
for (i=iend;i>=istart;i--)
{
//printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
// Branch out of this block, flush all regs
u=1;
uu=1;
+ gte_u=0;
/* Hexagon hack
if(itype[i]==UJUMP&&rt1[i]==31)
{
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
}
else
{
u=1;
uu=1;
+ gte_u=0;
}
}
}
{
// Unconditional branch
temp_u=1;temp_uu=1;
+ temp_gte_u=0;
} else {
// Conditional branch (not taken case)
temp_u=unneeded_reg[i+2];
temp_uu=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
// Merge in delay slot
tdep=(~temp_uu>>rt1[i+1])&1;
temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i+1];
+ temp_gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
temp_u&=unneeded_reg[i+2];
temp_uu&=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
else
{
temp_u=1;
temp_uu=1;
+ temp_gte_u=0;
}
}
tdep=(~temp_uu>>rt1[i])&1;
temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i];
+ temp_gte_u&=~gte_rs[i];
unneeded_reg[i]=temp_u;
unneeded_reg_upper[i]=temp_uu;
+ gte_unneeded[i]=temp_gte_u;
// Only go three levels deep. This recursion can take an
// excessive amount of time if there are a lot of nested loops.
if(r<2) {
}else{
unneeded_reg[(ba[i]-start)>>2]=1;
unneeded_reg_upper[(ba[i]-start)>>2]=1;
+ gte_unneeded[(ba[i]-start)>>2]=0;
}
} /*else*/ if(1) {
if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
// Unconditional branch
u=unneeded_reg[(ba[i]-start)>>2];
uu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_u=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=u;
branch_unneeded_reg_upper[i]=uu;
//u=1;
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
} else {
// Conditional branch
b=unneeded_reg[(ba[i]-start)>>2];
bu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_bu=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=b;
branch_unneeded_reg_upper[i]=bu;
//b=1;
bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
b|=1;bu|=1;
+ gte_bu|=gte_rt[i+1];
+ gte_bu&=~gte_rs[i+1];
// If branch is "likely" then we skip the
// delay slot on the fall-thru path
if(likely[i]) {
u=b;
uu=bu;
+ gte_u=gte_bu;
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
//u=1;
//uu=1;
}
} else {
u&=b;
uu&=bu;
+ gte_u&=gte_bu;
//u=1;
//uu=1;
}
u|=1LL<<rt2[i];
uu|=1LL<<rt1[i];
uu|=1LL<<rt2[i];
+ gte_u|=gte_rt[i];
// Accessed registers are needed
u&=~(1LL<<rs1[i]);
u&=~(1LL<<rs2[i]);
uu&=~(1LL<<us1[i]);
uu&=~(1LL<<us2[i]);
+ gte_u&=~gte_rs[i];
// Source-target dependencies
uu&=~(tdep<<dep1[i]);
uu&=~(tdep<<dep2[i]);
// Save it
unneeded_reg[i]=u;
unneeded_reg_upper[i]=uu;
+ gte_unneeded[i]=gte_u;
/*
printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
printf("U:");
}
}
+#ifdef DISASM
/* disassembly */
void disassemble_inst(int i)
{
printf (" %x: %s\n",start+i*4,insn[i]);
}
}
+#else
+static void disassemble_inst(int i) {}
+#endif // DISASM
// clear the state completely, instead of just marking
// things invalid like invalidate_all_pages() does
literalcount=0;
stop_after_jal=0;
inv_code_start=inv_code_end=~0;
+ gte_reads_flags=0;
// TLB
#ifndef DISABLE_TLB
using_tlb=0;
#endif
- sp_in_mirror=0;
for(n=0;n<524288;n++) // 0 .. 0x7FFFFFFF
memory_map[n]=-1;
for(n=524288;n<526336;n++) // 0x80000000 .. 0x807FFFFF
//rlist();
start = (u_int)addr&~3;
//assert(((u_int)addr&1)==0);
+ new_dynarec_did_compile=1;
#ifdef PCSX
- if(!sp_in_mirror&&(signed int)(psxRegs.GPR.n.sp&0xffe00000)>0x80200000&&
- 0x10000<=psxRegs.GPR.n.sp&&(psxRegs.GPR.n.sp&~0xe0e00000)<RAM_SIZE) {
- printf("SP hack enabled (%08x), @%08x\n", psxRegs.GPR.n.sp, psxRegs.pc);
- sp_in_mirror=1;
- }
if (Config.HLE && start == 0x80001000) // hlecall
{
// XXX: is this enough? Maybe check hleSoftCall?
emit_movimm(start,0);
emit_writeword(0,(int)&pcaddr);
emit_jmp((int)new_dyna_leave);
+ literal_pool(0);
#ifdef __arm__
__clear_cache((void *)beginning,out);
#endif
#endif
#ifdef PCSX
case 0x12: strcpy(insn[i],"COP2"); type=NI;
- // note: COP MIPS-1 encoding differs from MIPS32
op2=(source[i]>>21)&0x1f;
- if (source[i]&0x3f) {
+ //if (op2 & 0x10) {
+ if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
if (gte_handlers[source[i]&0x3f]!=NULL) {
- snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
+ if (gte_regnames[source[i]&0x3f]!=NULL)
+ strcpy(insn[i],gte_regnames[source[i]&0x3f]);
+ else
+ snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
type=C2OP;
}
}
us2[i]=0;
dep1[i]=0;
dep2[i]=0;
+ gte_rs[i]=gte_rt[i]=0;
switch(type) {
case LOAD:
rs1[i]=(source[i]>>21)&0x1f;
if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET
break;
case COP1:
- case COP2:
rs1[i]=0;
rs2[i]=0;
rt1[i]=0;
if(op2==5) us1[i]=rs1[i]; // DMTC1
rs2[i]=CSREG;
break;
+ case COP2:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2
+ if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2
+ rs2[i]=CSREG;
+ int gr=(source[i]>>11)&0x1F;
+ switch(op2)
+ {
+ case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
+ case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
+ case 0x02: gte_rs[i]=1ll<<(gr+32); // CFC2
+ if(gr==31&&!gte_reads_flags) {
+ assem_debug("gte flag read encountered @%08x\n",addr + i*4);
+ gte_reads_flags=1;
+ }
+ break;
+ case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
+ }
+ break;
case C1LS:
rs1[i]=(source[i]>>21)&0x1F;
rs2[i]=CSREG;
rt1[i]=0;
rt2[i]=0;
imm[i]=(short)source[i];
+ if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
+ else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
+ break;
+ case C2OP:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ gte_rt[i]=1ll<<63; // every op changes flags
+ // TODO: other regs?
break;
case FLOAT:
case FCONV:
if(itype[slen-1]==SPAN) {
bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception
}
-
+
+#ifdef DISASM
/* Debug/disassembly */
- if((void*)assem_debug==(void*)printf)
for(i=0;i<slen;i++)
{
printf("U:");
#endif
}
}
+#endif // DISASM
/* Pass 8 - Assembly */
linkcount=0;stubcount=0;
for(i=0;i<slen;i++)
{
//if(ds) printf("ds: ");
- if((void*)assem_debug==(void*)printf) disassemble_inst(i);
+ disassemble_inst(i);
if(ds) {
ds=0; // Skip delay slot
if(bt[i]) assem_debug("OOPS - branch into delay slot\n");
instr_addr[i]=0;
} else {
+ speculate_register_values(i);
#ifndef DESTRUCTIVE_WRITEBACK
if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))
{
}
inv_code_start=inv_code_end=~0;
#ifdef PCSX
- // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE
+ // for PCSX we need to mark all mirrors too
if(get_page(start)<(RAM_SIZE>>12))
for(i=start>>12;i<=(start+slen*4)>>12;i++)
- invalid_code[((u_int)0x80000000>>12)|i]=0;
+ invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
+ invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
+ invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
#endif
/* Pass 10 - Free memory by expiring oldest blocks */