#define MAXBLOCK 4096
#define MAX_OUTPUT_BLOCK_SIZE 262144
-int cycle_multiplier; // 100 for 1.0
-#define CLOCK_ADJUST(x) (((x) * cycle_multiplier + 50) / 100)
-
struct regstat
{
signed char regmap_entry[HOST_REGS];
uint64_t uu;
u_int wasconst;
u_int isconst;
+ u_int loadedconst; // host regs that have constants loaded
+ u_int waswritten; // MIPS regs that were used as store base before
uint64_t constmap[HOST_REGS];
};
static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
static uint64_t gte_rt[MAXBLOCK];
static uint64_t gte_unneeded[MAXBLOCK];
- static int gte_reads_flags; // gte flag read encountered
static u_int smrv[32]; // speculated MIPS register values
static u_int smrv_strong; // mask or regs that are likely to have correct values
static u_int smrv_weak; // same, but somewhat less likely
static const u_int using_tlb=0;
#endif
int new_dynarec_did_compile;
+ int new_dynarec_hacks;
u_int stop_after_jal;
extern u_char restore_candidate[512];
extern int cycle_count;
//#define DEBUG_CYCLE_COUNT 1
+int cycle_multiplier; // 100 for 1.0
+
+static int CLOCK_ADJUST(int x)
+{
+ int s=(x>>31)|1;
+ return (x * cycle_multiplier + s * 50) / 100;
+}
+
static void tlb_hacks()
{
#ifndef DISABLE_TLB
return page;
}
+#ifndef PCSX
static u_int get_vpage(u_int vaddr)
{
u_int vpage=(vaddr^0x80000000)>>12;
if(vpage>2048) vpage=2048+(vpage&2047);
return vpage;
}
+#else
+// no virtual mem in PCSX
+static u_int get_vpage(u_int vaddr)
+{
+ return get_page(vaddr);
+}
+#endif
// Get address from virtual address
// This is called from the recompiled JR/JALR instructions
//static int rhits;
// this check is done by the caller
//if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
- u_int page=get_page(addr);
+ u_int page=get_vpage(addr);
if(page<2048) { // RAM
struct ll_entry *head;
u_int addr_min=~0, addr_max=0;
return;
}
else {
- inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);//rhits);
- }
- //rhits=0;
- if(page!=0) // FIXME: don't know what's up with page 0 (Klonoa)
+ inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
return;
+ }
}
#endif
invalidate_block(addr>>12);
jaddr=0;
}
#endif
- if(!using_tlb) {
+ if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
if(!c||memtarget) {
#ifdef DESTRUCTIVE_SHIFT
// The x86 shift operation is 'destructive'; it overwrites the
}
if(!c||!memtarget)
add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
- if(!using_tlb) {
+ if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
#ifdef RAM_OFFSET
int map=get_reg(i_regs->regmap,ROREG);
if(map<0) map=HOST_TEMPREG;
emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp);
type=STORED_STUB;
}
- if(!using_tlb) {
+ if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
#ifndef DESTRUCTIVE_SHIFT
temp=offset||c||s<0?ar:s;
}
if(jaddr2)
add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist);
- if (opcode[i]==0x3a) { // SWC2
+ if(opcode[i]==0x3a) // SWC2
+ if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
#if defined(HOST_IMM8)
int ir=get_reg(i_regs->regmap,INVCP);
assert(ir>=0);
(using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000))
#endif
emit_movimm(constmap[i][rs]+offset,ra);
+ regs[i].loadedconst|=1<<ra;
}
} // else did it in the previous cycle
} // else load_consts already did it
(using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000))
#endif
emit_movimm(constmap[i+1][rs]+offset,ra);
+ regs[i+1].loadedconst|=1<<ra;
}
}
else if(rs1[i+1]==0) {
// Load registers with known constants
void load_consts(signed char pre[],signed char regmap[],int is32,int i)
{
- int hr;
+ int hr,hr2;
+ // propagate loaded constant flags
+ if(i==0||bt[i])
+ regs[i].loadedconst=0;
+ else {
+ for(hr=0;hr<HOST_REGS;hr++) {
+ if(hr!=EXCLUDE_REG&®map[hr]>=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr]
+ &®map[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1))
+ {
+ regs[i].loadedconst|=1<<hr;
+ }
+ }
+ }
// Load 32-bit regs
for(hr=0;hr<HOST_REGS;hr++) {
if(hr!=EXCLUDE_REG&®map[hr]>=0) {
//if(entry[hr]!=regmap[hr]) {
- if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) {
+ if(!((regs[i].loadedconst>>hr)&1)) {
if(((regs[i].isconst>>hr)&1)&®map[hr]<64&®map[hr]>0) {
- int value;
+ int value,similar=0;
if(get_final_value(hr,i,&value)) {
- if(value==0) {
+ // see if some other register has similar value
+ for(hr2=0;hr2<HOST_REGS;hr2++) {
+ if(hr2!=EXCLUDE_REG&&((regs[i].loadedconst>>hr2)&1)) {
+ if(is_similar_value(value,constmap[i][hr2])) {
+ similar=1;
+ break;
+ }
+ }
+ }
+ if(similar) {
+ int value2;
+ if(get_final_value(hr2,i,&value2)) // is this needed?
+ emit_movimm_from(value2,hr2,value,hr);
+ else
+ emit_movimm(value,hr);
+ }
+ else if(value==0) {
emit_zeroreg(hr);
}
else {
emit_movimm(value,hr);
}
}
+ regs[i].loadedconst|=1<<hr;
}
}
}
{
int i;
uint64_t u,uu,gte_u,b,bu,gte_bu;
- uint64_t temp_u,temp_uu,temp_gte_u;
+ uint64_t temp_u,temp_uu,temp_gte_u=0;
uint64_t tdep;
+ uint64_t gte_u_unknown=0;
+ if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED)
+ gte_u_unknown=~0ll;
if(iend==slen-1) {
u=1;uu=1;
+ gte_u=gte_u_unknown;
}else{
u=unneeded_reg[iend+1];
uu=unneeded_reg_upper[iend+1];
u=1;uu=1;
+ gte_u=gte_unneeded[iend+1];
}
- gte_u=temp_gte_u=0;
for (i=iend;i>=istart;i--)
{
// Branch out of this block, flush all regs
u=1;
uu=1;
- gte_u=0;
+ gte_u=gte_u_unknown;
/* Hexagon hack
if(itype[i]==UJUMP&&rt1[i]==31)
{
{
u=1;
uu=1;
- gte_u=0;
+ gte_u=gte_u_unknown;
}
}
}
{
temp_u=1;
temp_uu=1;
- temp_gte_u=0;
+ temp_gte_u=gte_u_unknown;
}
}
tdep=(~temp_uu>>rt1[i])&1;
}else{
unneeded_reg[(ba[i]-start)>>2]=1;
unneeded_reg_upper[(ba[i]-start)>>2]=1;
- gte_unneeded[(ba[i]-start)>>2]=0;
+ gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown;
}
} /*else*/ if(1) {
if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
uu&=~(1LL<<us1[i]);
uu&=~(1LL<<us2[i]);
gte_u&=~gte_rs[i];
+ if(gte_rs[i]&&rt1[i]&&(unneeded_reg[i+1]&(1ll<<rt1[i])))
+ gte_u|=gte_rs[i]; // MFC2/CFC2 to dead register, unneeded
// Source-target dependencies
uu&=~(tdep<<dep1[i]);
uu&=~(tdep<<dep2[i]);
literalcount=0;
stop_after_jal=0;
inv_code_start=inv_code_end=~0;
- gte_reads_flags=0;
// TLB
#ifndef DISABLE_TLB
using_tlb=0;
{
case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
- case 0x02: gte_rs[i]=1ll<<(gr+32); // CFC2
- if(gr==31&&!gte_reads_flags) {
- assem_debug("gte flag read encountered @%08x\n",addr + i*4);
- gte_reads_flags=1;
- }
- break;
+ case 0x02: gte_rs[i]=1ll<<(gr+32); break; // CFC2
case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
}
break;
dirty_reg(¤t,CCREG);
current.isconst=0;
current.wasconst=0;
+ current.waswritten=0;
int ds=0;
int cc=0;
int hr=-1;
if(current.regmap[hr]==0) current.regmap[hr]=-1;
}
current.isconst=0;
+ current.waswritten=0;
}
if(i>1)
{
regs[i].wasconst=current.isconst;
regs[i].was32=current.is32;
regs[i].wasdirty=current.dirty;
+ regs[i].loadedconst=0;
#if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32)
// To change a dirty register from 32 to 64 bits, we must write
// it out during the previous cycle (for branches, 2 cycles)
}
memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap));
}
+
+ if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800)
+ current.waswritten|=1<<rs1[i-1];
+ current.waswritten&=~(1<<rt1[i]);
+ current.waswritten&=~(1<<rt2[i]);
+ if((itype[i]==STORE||itype[i]==STORELR||(itype[i]==C2LS&&opcode[i]==0x3a))&&(u_int)imm[i]>=0x800)
+ current.waswritten&=~(1<<rs1[i]);
+
/* Branch post-alloc */
if(i>0)
{
{
cc=0;
}
-#ifdef PCSX
+#if defined(PCSX) && !defined(DRC_DBG)
else if(itype[i]==C2OP&>e_cycletab[source[i]&0x3f]>2)
{
// GTE runs in parallel until accessed, divide by 2 for a rough guess
}
}
if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1;
+ regs[i].waswritten=current.waswritten;
}
/* Pass 4 - Cull unused host registers */