From 40fca85b7b5d1a13d8df1a7674487409630d40fe Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 15 Nov 2021 21:09:47 +0200 Subject: [PATCH] drc: adjust constants, 32bit is enough --- libpcsxcore/new_dynarec/new_dynarec.c | 47 +++++++++++++------ libpcsxcore/new_dynarec/patches/trace_drc_chk | 37 ++++++++------- libpcsxcore/new_dynarec/patches/trace_intr | 28 +++++++---- 3 files changed, 72 insertions(+), 40 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ceba0e74..a1d7f6a4 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -185,8 +185,10 @@ struct link_entry static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i? - static uint64_t current_constmap[HOST_REGS]; - static uint64_t constmap[MAXBLOCK][HOST_REGS]; + // contains 'real' consts at [i] insn, but may differ from what's actually + // loaded in host reg as 'final' value is always loaded, see get_final_value() + static uint32_t current_constmap[HOST_REGS]; + static uint32_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; static signed char minimum_free_regs[MAXBLOCK]; @@ -592,7 +594,7 @@ void dirty_reg(struct regstat *cur,signed char reg) } } -void set_const(struct regstat *cur,signed char reg,uint64_t value) +static void set_const(struct regstat *cur, signed char reg, uint32_t value) { int hr; if(!reg) return; @@ -604,7 +606,7 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) } } -void clear_const(struct regstat *cur,signed char reg) +static void clear_const(struct regstat *cur, signed char reg) { int hr; if(!reg) return; @@ -615,7 +617,7 @@ void clear_const(struct regstat *cur,signed char reg) } } -int is_const(struct regstat *cur,signed char reg) +static int is_const(struct regstat *cur, signed char reg) { int hr; if(reg<0) return 0; @@ -627,7 +629,8 @@ int is_const(struct regstat *cur,signed char reg) } return 0; } -uint64_t get_const(struct regstat *cur,signed char reg) + +static uint32_t get_const(struct regstat *cur, signed char reg) { int hr; if(!reg) return 0; @@ -1717,7 +1720,7 @@ static void imm16_alloc(struct regstat *current,int i) else clear_const(current,rt1[i]); } else { - set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI + set_const(current,rt1[i],imm[i]<<16); // LUI } dirty_reg(current,rt1[i]); } @@ -4322,9 +4325,24 @@ static void drc_dbg_emit_do_cmp(int i) //extern int cycle; u_int hr,reglist=0; - for(hr=0;hr=0) reglist|=1< 0 && !bt[i]) { + for (hr = 0; hr < HOST_REGS; hr++) { + int reg = regs[i-1].regmap[hr]; + if (hr == EXCLUDE_REG || reg < 0) + continue; + if (!((regs[i-1].isconst >> hr) & 1)) + continue; + if (i > 1 && reg == regs[i-2].regmap[hr] && constmap[i-1][hr] == constmap[i-2][hr]) + continue; + emit_movimm(constmap[i-1][hr],0); + emit_storereg(reg, 0); + } + } emit_movimm(start+i*4,0); emit_writeword(0,&pcaddr); emit_far_call(do_insn_cmp); @@ -4333,6 +4351,7 @@ static void drc_dbg_emit_do_cmp(int i) //emit_writeword(0,&cycle); (void)get_reg2; restore_regs(reglist); + assem_debug("\\\\do_insn_cmp\n"); } #else #define drc_dbg_emit_do_cmp(x) @@ -7697,7 +7716,7 @@ int new_recompile_block(u_int addr) dirty_reg(&branch_regs[i-1],31); } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); break; case RJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -7718,7 +7737,7 @@ int new_recompile_block(u_int addr) } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); break; case CJUMP: if((opcode[i-1]&0x3E)==4) // BEQ/BNE @@ -7745,7 +7764,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ @@ -7770,7 +7789,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else // Alloc the delay slot in case the branch is taken @@ -7824,7 +7843,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else // Alloc the delay slot in case the branch is taken @@ -7921,7 +7940,7 @@ int new_recompile_block(u_int addr) if(!is_ds[i]) { regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; - memcpy(constmap[i],current_constmap,sizeof(current_constmap)); + memcpy(constmap[i],current_constmap,sizeof(constmap[i])); } for(hr=0;hr=0) { diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index e09af7ac..eca104d6 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -48,7 +48,7 @@ index bbc52c3..83c5b08 100644 ldr r0, [fp, #LO_next_interupt] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S -index 698bd78..798abea 100644 +index 444545c..031cee2 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -119,7 +119,7 @@ FUNCTION(cc_interrupt): @@ -79,7 +79,7 @@ index 698bd78..798abea 100644 .macro memhandler_post diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index 1452db3..8200e44 100644 +index a1d7f6a..3960f3b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -44,10 +44,10 @@ static int sceBlock; @@ -96,7 +96,7 @@ index 1452db3..8200e44 100644 #define inv_debug(...) #ifdef __i386__ -@@ -423,6 +423,9 @@ static int doesnt_expire_soon(void *tcaddr) +@@ -489,6 +489,9 @@ static int doesnt_expire_soon(void *tcaddr) // This is called from the recompiled JR/JALR instructions void noinline *get_addr(u_int vaddr) { @@ -106,7 +106,7 @@ index 1452db3..8200e44 100644 u_int page=get_page(vaddr); u_int vpage=get_vpage(vaddr); struct ll_entry *head; -@@ -4393,13 +4396,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) +@@ -4485,13 +4492,15 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) } emit_addimm_and_set_flags(cycles,HOST_CCREG); jaddr=out; @@ -124,7 +124,7 @@ index 1452db3..8200e44 100644 } add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0); } -@@ -4807,7 +4812,8 @@ static void rjump_assemble(int i,struct regstat *i_regs) +@@ -4899,7 +4908,8 @@ static void rjump_assemble(int i,struct regstat *i_regs) // special case for RFE emit_jmp(0); else @@ -134,7 +134,7 @@ index 1452db3..8200e44 100644 //load_regs_bt(branch_regs[i].regmap,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT if(rs1[i]==31) { -@@ -4912,7 +4918,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) +@@ -5004,7 +5014,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) else if(nop) { emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -144,7 +144,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); } else { -@@ -5099,7 +5106,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) +@@ -5191,7 +5202,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); void *jaddr=out; @@ -154,7 +154,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); emit_storereg(CCREG,HOST_CCREG); } -@@ -5108,7 +5116,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) +@@ -5200,7 +5212,8 @@ static void cjump_assemble(int i,struct regstat *i_regs) assert(cc==HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -164,7 +164,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); } } -@@ -5210,7 +5219,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) +@@ -5302,7 +5315,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) else if(nevertaken) { emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -174,7 +174,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); } else { -@@ -5366,7 +5376,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) +@@ -5458,7 +5472,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) emit_loadreg(CCREG,HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); void *jaddr=out; @@ -184,7 +184,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0); emit_storereg(CCREG,HOST_CCREG); } -@@ -5375,7 +5386,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) +@@ -5467,7 +5482,8 @@ static void sjump_assemble(int i,struct regstat *i_regs) assert(cc==HOST_CCREG); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc); void *jaddr=out; @@ -194,7 +194,7 @@ index 1452db3..8200e44 100644 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0); } } -@@ -5863,7 +5875,7 @@ void unneeded_registers(int istart,int iend,int r) +@@ -5955,7 +5971,7 @@ void unneeded_registers(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -203,7 +203,7 @@ index 1452db3..8200e44 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8387,6 +8399,7 @@ int new_recompile_block(int addr) +@@ -8474,6 +8491,7 @@ int new_recompile_block(u_int addr) // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. @@ -211,7 +211,7 @@ index 1452db3..8200e44 100644 for(i=0;i>16)==0x1000) literal_pool(1024); else -@@ -8950,7 +8969,7 @@ int new_recompile_block(int addr) +@@ -9037,7 +9062,7 @@ int new_recompile_block(u_int addr) } } // External Branch Targets (jump_in) @@ -240,7 +241,7 @@ index 1452db3..8200e44 100644 for(i=0;i> 26; switch (tmp) { -@@ -547,13 +548,15 @@ static void doBranch(u32 tar) { +@@ -546,13 +547,15 @@ static void doBranch(u32 tar) { } break; } @@ -111,7 +123,7 @@ index 02e00a9..a007dc5 100644 } /********************************************************* -@@ -636,12 +639,13 @@ void psxMULTU() { +@@ -635,12 +638,13 @@ void psxMULTU() { psxRegs.GPR.n.hi = (u32)((res >> 32) & 0xffffffff); } @@ -127,7 +139,7 @@ index 02e00a9..a007dc5 100644 void psxBGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 void psxBGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -@@ -711,7 +715,7 @@ void psxRFE() { +@@ -710,7 +714,7 @@ void psxRFE() { * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ @@ -136,7 +148,7 @@ index 02e00a9..a007dc5 100644 void psxBEQ() { RepBranchi32(==) } // Branch if Rs == Rt void psxBNE() { RepBranchi32(!=) } // Branch if Rs != Rt -@@ -895,6 +899,9 @@ void MTC0(int reg, u32 val) { +@@ -894,6 +898,9 @@ void MTC0(int reg, u32 val) { case 12: // Status psxRegs.CP0.r[12] = val; psxTestSWInts(); @@ -146,7 +158,7 @@ index 02e00a9..a007dc5 100644 break; case 13: // Cause -@@ -1057,6 +1064,23 @@ void intExecuteBlock() { +@@ -1056,6 +1063,23 @@ void intExecuteBlock() { while (!branch2) execI(); } -- 2.39.5