static uint64_t unneeded_reg[MAXBLOCK];
static uint64_t branch_unneeded_reg[MAXBLOCK];
static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i?
- static uint64_t current_constmap[HOST_REGS];
- static uint64_t constmap[MAXBLOCK][HOST_REGS];
+ // contains 'real' consts at [i] insn, but may differ from what's actually
+ // loaded in host reg as 'final' value is always loaded, see get_final_value()
+ static uint32_t current_constmap[HOST_REGS];
+ static uint32_t constmap[MAXBLOCK][HOST_REGS];
static struct regstat regs[MAXBLOCK];
static struct regstat branch_regs[MAXBLOCK];
static signed char minimum_free_regs[MAXBLOCK];
}
}
-void set_const(struct regstat *cur,signed char reg,uint64_t value)
+static void set_const(struct regstat *cur, signed char reg, uint32_t value)
{
int hr;
if(!reg) return;
}
}
-void clear_const(struct regstat *cur,signed char reg)
+static void clear_const(struct regstat *cur, signed char reg)
{
int hr;
if(!reg) return;
}
}
-int is_const(struct regstat *cur,signed char reg)
+static int is_const(struct regstat *cur, signed char reg)
{
int hr;
if(reg<0) return 0;
}
return 0;
}
-uint64_t get_const(struct regstat *cur,signed char reg)
+
+static uint32_t get_const(struct regstat *cur, signed char reg)
{
int hr;
if(!reg) return 0;
else clear_const(current,rt1[i]);
}
else {
- set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
+ set_const(current,rt1[i],imm[i]<<16); // LUI
}
dirty_reg(current,rt1[i]);
}
//extern int cycle;
u_int hr,reglist=0;
- for(hr=0;hr<HOST_REGS;hr++)
+ assem_debug("//do_insn_cmp %08x\n", start+i*4);
+ for (hr = 0; hr < HOST_REGS; hr++)
if(regs[i].regmap[hr]>=0) reglist|=1<<hr;
save_regs(reglist);
+ // write out changed consts to match the interpreter
+ if (i > 0 && !bt[i]) {
+ for (hr = 0; hr < HOST_REGS; hr++) {
+ int reg = regs[i-1].regmap[hr];
+ if (hr == EXCLUDE_REG || reg < 0)
+ continue;
+ if (!((regs[i-1].isconst >> hr) & 1))
+ continue;
+ if (i > 1 && reg == regs[i-2].regmap[hr] && constmap[i-1][hr] == constmap[i-2][hr])
+ continue;
+ emit_movimm(constmap[i-1][hr],0);
+ emit_storereg(reg, 0);
+ }
+ }
emit_movimm(start+i*4,0);
emit_writeword(0,&pcaddr);
emit_far_call(do_insn_cmp);
//emit_writeword(0,&cycle);
(void)get_reg2;
restore_regs(reglist);
+ assem_debug("\\\\do_insn_cmp\n");
}
#else
#define drc_dbg_emit_do_cmp(x)
dirty_reg(&branch_regs[i-1],31);
}
memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
- memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
+ memcpy(constmap[i],constmap[i-1],sizeof(constmap[i]));
break;
case RJUMP:
memcpy(&branch_regs[i-1],¤t,sizeof(current));
}
#endif
memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
- memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
+ memcpy(constmap[i],constmap[i-1],sizeof(constmap[i]));
break;
case CJUMP:
if((opcode[i-1]&0x3E)==4) // BEQ/BNE
branch_regs[i-1].isconst=0;
branch_regs[i-1].wasconst=0;
memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap));
- memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
+ memcpy(constmap[i],constmap[i-1],sizeof(constmap[i]));
}
else
if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ
branch_regs[i-1].isconst=0;
branch_regs[i-1].wasconst=0;
memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap));
- memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
+ memcpy(constmap[i],constmap[i-1],sizeof(constmap[i]));
}
else
// Alloc the delay slot in case the branch is taken
branch_regs[i-1].isconst=0;
branch_regs[i-1].wasconst=0;
memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap));
- memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
+ memcpy(constmap[i],constmap[i-1],sizeof(constmap[i]));
}
else
// Alloc the delay slot in case the branch is taken
if(!is_ds[i]) {
regs[i].dirty=current.dirty;
regs[i].isconst=current.isconst;
- memcpy(constmap[i],current_constmap,sizeof(current_constmap));
+ memcpy(constmap[i],current_constmap,sizeof(constmap[i]));
}
for(hr=0;hr<HOST_REGS;hr++) {
if(hr!=EXCLUDE_REG&®s[i].regmap[hr]>=0) {
ldr r0, [fp, #LO_next_interupt]
diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S
-index 698bd78..798abea 100644
+index 444545c..031cee2 100644
--- a/libpcsxcore/new_dynarec/linkage_arm64.S
+++ b/libpcsxcore/new_dynarec/linkage_arm64.S
@@ -119,7 +119,7 @@ FUNCTION(cc_interrupt):
.macro memhandler_post
diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c
-index 1452db3..8200e44 100644
+index a1d7f6a..3960f3b 100644
--- a/libpcsxcore/new_dynarec/new_dynarec.c
+++ b/libpcsxcore/new_dynarec/new_dynarec.c
@@ -44,10 +44,10 @@ static int sceBlock;
#define inv_debug(...)
#ifdef __i386__
-@@ -423,6 +423,9 @@ static int doesnt_expire_soon(void *tcaddr)
+@@ -489,6 +489,9 @@ static int doesnt_expire_soon(void *tcaddr)
// This is called from the recompiled JR/JALR instructions
void noinline *get_addr(u_int vaddr)
{
u_int page=get_page(vaddr);
u_int vpage=get_vpage(vaddr);
struct ll_entry *head;
-@@ -4393,13 +4396,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert)
+@@ -4485,13 +4492,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert)
}
emit_addimm_and_set_flags(cycles,HOST_CCREG);
jaddr=out;
}
add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0);
}
-@@ -4807,7 +4812,8 @@ static void rjump_assemble(int i,struct regstat *i_regs)
+@@ -4899,7 +4908,8 @@ static void rjump_assemble(int i,struct regstat *i_regs)
// special case for RFE
emit_jmp(0);
else
//load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1);
#ifdef USE_MINI_HT
if(rs1[i]==31) {
-@@ -4912,7 +4918,8 @@ static void cjump_assemble(int i,struct regstat *i_regs)
+@@ -5004,7 +5014,8 @@ static void cjump_assemble(int i,struct regstat *i_regs)
else if(nop) {
emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
void *jaddr=out;
add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
}
else {
-@@ -5099,7 +5106,8 @@ static void cjump_assemble(int i,struct regstat *i_regs)
+@@ -5191,7 +5202,8 @@ static void cjump_assemble(int i,struct regstat *i_regs)
emit_loadreg(CCREG,HOST_CCREG);
emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
void *jaddr=out;
add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
emit_storereg(CCREG,HOST_CCREG);
}
-@@ -5108,7 +5116,8 @@ static void cjump_assemble(int i,struct regstat *i_regs)
+@@ -5200,7 +5212,8 @@ static void cjump_assemble(int i,struct regstat *i_regs)
assert(cc==HOST_CCREG);
emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
void *jaddr=out;
add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
}
}
-@@ -5210,7 +5219,8 @@ static void sjump_assemble(int i,struct regstat *i_regs)
+@@ -5302,7 +5315,8 @@ static void sjump_assemble(int i,struct regstat *i_regs)
else if(nevertaken) {
emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
void *jaddr=out;
add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
}
else {
-@@ -5366,7 +5376,8 @@ static void sjump_assemble(int i,struct regstat *i_regs)
+@@ -5458,7 +5472,8 @@ static void sjump_assemble(int i,struct regstat *i_regs)
emit_loadreg(CCREG,HOST_CCREG);
emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
void *jaddr=out;
add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
emit_storereg(CCREG,HOST_CCREG);
}
-@@ -5375,7 +5386,8 @@ static void sjump_assemble(int i,struct regstat *i_regs)
+@@ -5467,7 +5482,8 @@ static void sjump_assemble(int i,struct regstat *i_regs)
assert(cc==HOST_CCREG);
emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
void *jaddr=out;
add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
}
}
-@@ -5863,7 +5875,7 @@ void unneeded_registers(int istart,int iend,int r)
+@@ -5955,7 +5971,7 @@ void unneeded_registers(int istart,int iend,int r)
// R0 is always unneeded
u|=1;
// Save it
gte_unneeded[i]=gte_u;
/*
printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
-@@ -8387,6 +8399,7 @@ int new_recompile_block(int addr)
+@@ -8474,6 +8491,7 @@ int new_recompile_block(u_int addr)
// This allocates registers (if possible) one instruction prior
// to use, which can avoid a load-use penalty on certain CPUs.
for(i=0;i<slen-1;i++)
{
if(!i||(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP))
-@@ -8543,6 +8556,7 @@ int new_recompile_block(int addr)
+@@ -8630,6 +8648,7 @@ int new_recompile_block(u_int addr)
}
}
}
/* Pass 6 - Optimize clean/dirty state */
clean_registers(0,slen-1,1);
-@@ -8842,6 +8856,11 @@ int new_recompile_block(int addr)
+@@ -8929,6 +8948,12 @@ int new_recompile_block(u_int addr)
case SPAN:
pagespan_assemble(i,®s[i]);break;
}
+
+#ifdef DRC_DBG
++ // write-out non-consts, consts are likely different because of get_final_value()
+ if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP)
-+ wb_dirtys(regs[i].regmap,regs[i].dirty);
++ wb_dirtys(regs[i].regmap,regs[i].dirty&~regs[i].loadedconst);
+#endif
if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
literal_pool(1024);
else
-@@ -8950,7 +8969,7 @@ int new_recompile_block(int addr)
+@@ -9037,7 +9062,7 @@ int new_recompile_block(u_int addr)
}
}
// External Branch Targets (jump_in)
for(i=0;i<slen;i++)
{
if(bt[i]||i==0)
-@@ -9065,6 +9084,10 @@ int new_recompile_block(int addr)
+@@ -9150,6 +9175,10 @@ int new_recompile_block(u_int addr)
}
expirep=(expirep+1)&65535;
}
+diff --git a/Makefile b/Makefile
+index 0db94f7..e4fe560 100644
+--- a/Makefile
++++ b/Makefile
+@@ -26,6 +26,7 @@ endif
+ else # NO_CONFIG_MAK
+ config.mak:
+ endif
++CFLAGS += -UICACHE_EMULATION
+
+ -include Makefile.local
+
diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c
-index 2c82f58..8572981 100644
+index 2df259b..2a15e6e 100644
--- a/libpcsxcore/new_dynarec/emu_if.c
+++ b/libpcsxcore/new_dynarec/emu_if.c
@@ -417,13 +417,17 @@ static void ari64_shutdown()
#ifdef PSXHW_LOG
PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value);
diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c
-index 02e00a9..a007dc5 100644
+index 61c60ed..0fa5283 100644
--- a/libpcsxcore/psxinterpreter.c
+++ b/libpcsxcore/psxinterpreter.c
-@@ -512,8 +512,9 @@ static void doBranch(u32 tar) {
+@@ -511,8 +511,9 @@ static void doBranch(u32 tar) {
debugI();
psxRegs.pc += 4;
// check for load delay
tmp = psxRegs.code >> 26;
switch (tmp) {
-@@ -547,13 +548,15 @@ static void doBranch(u32 tar) {
+@@ -546,13 +547,15 @@ static void doBranch(u32 tar) {
}
break;
}
}
/*********************************************************
-@@ -636,12 +639,13 @@ void psxMULTU() {
+@@ -635,12 +638,13 @@ void psxMULTU() {
psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff);
}
void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0
void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link
-@@ -711,7 +715,7 @@ void psxRFE() {
+@@ -710,7 +714,7 @@ void psxRFE() {
* Register branch logic *
* Format: OP rs, rt, offset *
*********************************************************/
void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt
void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt
-@@ -895,6 +899,9 @@ void MTC0(int reg, u32 val) {
+@@ -894,6 +898,9 @@ void MTC0(int reg, u32 val) {
case 12: // Status
psxRegs.CP0.r[12] = val;
psxTestSWInts();
break;
case 13: // Cause
-@@ -1057,6 +1064,23 @@ void intExecuteBlock() {
+@@ -1056,6 +1063,23 @@ void intExecuteBlock() {
while (!branch2) execI();
}