From a22ccd6a80307ef5f711332f68de96949cdeee76 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 3 Aug 2023 01:22:58 +0300 Subject: [PATCH] arm64: use ldp/stp more --- libpcsxcore/new_dynarec/assem_arm64.c | 59 +++++++++++++++++++++++++++ libpcsxcore/new_dynarec/new_dynarec.c | 58 +++++++++++--------------- 2 files changed, 82 insertions(+), 35 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index dc5bb4db..97e1fb14 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1950,6 +1950,65 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) } #define multdiv_assemble multdiv_assemble_arm64 +// wb_dirtys making use of stp when possible +static void wb_dirtys(const signed char i_regmap[], u_int i_dirty) +{ + signed char mregs[34+1]; + int r, hr; + memset(mregs, -1, sizeof(mregs)); + for (hr = 0; hr < HOST_REGS; hr++) { + r = i_regmap[hr]; + if (hr == EXCLUDE_REG || r <= 0 || r == CCREG) + continue; + if (!((i_dirty >> hr) & 1)) + continue; + assert(r < 34u); + mregs[r] = hr; + } + for (r = 1; r < 34; r++) { + if (mregs[r] < 0) + continue; + if (mregs[r+1] >= 0) { + uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local; + emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset); + r++; + } + else + emit_storereg(r, mregs[r]); + } +} +#define wb_dirtys wb_dirtys + +static void load_all_regs(const signed char i_regmap[]) +{ + signed char mregs[34+1]; + int r, hr; + memset(mregs, -1, sizeof(mregs)); + for (hr = 0; hr < HOST_REGS; hr++) { + r = i_regmap[hr]; + if (hr == EXCLUDE_REG || r < 0 || r == CCREG) + continue; + if ((u_int)r < 34u) + mregs[r] = hr; + else if (r < TEMPREG) + emit_loadreg(r, hr); + } + if (mregs[0] >= 0) + emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc + for (r = 1; r < 34; r++) { + if (mregs[r] < 0) + continue; + if (mregs[r+1] >= 0) { + uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local; + emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset); + r++; + } + else + emit_loadreg(r, mregs[r]); + } +} +#define load_all_regs load_all_regs + static void do_jump_vaddr(u_int rs) { if (rs != 0) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index db751266..dcf940d3 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -158,14 +158,14 @@ struct regstat { signed char regmap_entry[HOST_REGS]; signed char regmap[HOST_REGS]; - uint64_t wasdirty; - uint64_t dirty; - uint64_t u; + u_int wasdirty; + u_int dirty; u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true u_int isconst; // ... but isconst is false when r2 is known (hr) u_int loadedconst; // host regs that have constants loaded u_int noevict; // can't evict this hr (alloced by current op) //u_int waswritten; // MIPS regs that were used as store base before + uint64_t u; }; struct ht_entry @@ -409,9 +409,9 @@ static void invalidate_block(struct block_info *block); static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_); // Needed by assembler -static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); -static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty); -static void wb_needed_dirtys(const signed char i_regmap[], uint64_t i_dirty, int addr); +static void wb_register(signed char r, const signed char regmap[], u_int dirty); +static void wb_dirtys(const signed char i_regmap[], u_int i_dirty); +static void wb_needed_dirtys(const signed char i_regmap[], u_int i_dirty, int addr); static void load_all_regs(const signed char i_regmap[]); static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]); static void load_regs_entry(int t); @@ -2288,7 +2288,7 @@ static void add_stub_r(enum stub_type type, void *addr, void *retaddr, } // Write out a single register -static void wb_register(signed char r, const signed char regmap[], uint64_t dirty) +static void wb_register(signed char r, const signed char regmap[], u_int dirty) { int hr; for(hr=0;hr>2; @@ -4738,6 +4741,7 @@ static void wb_needed_dirtys(const signed char i_regmap[], uint64_t i_dirty, int } // Load all registers (except cycle count) +#ifndef load_all_regs static void load_all_regs(const signed char i_regmap[]) { int hr; @@ -4754,48 +4758,31 @@ static void load_all_regs(const signed char i_regmap[]) } } } +#endif // Load all current registers also needed by next instruction static void load_needed_regs(const signed char i_regmap[], const signed char next_regmap[]) { + signed char regmap_sel[HOST_REGS]; int hr; - for(hr=0;hr=0) { - if(i_regmap[hr]==0) { - emit_zeroreg(hr); - } - else - if(i_regmap[hr]>0 && i_regmap[hr]= 0) + regmap_sel[hr] = i_regmap[hr]; } + load_all_regs(regmap_sel); } // Load all regs, storing cycle count if necessary static void load_regs_entry(int t) { - int hr; if(dops[t].is_ds) emit_addimm(HOST_CCREG,CLOCK_ADJUST(1),HOST_CCREG); else if(cinfo[t].ccadj) emit_addimm(HOST_CCREG,-cinfo[t].ccadj,HOST_CCREG); if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) { emit_storereg(CCREG,HOST_CCREG); } - // Load 32-bit regs - for(hr=0;hr=0&®s[t].regmap_entry[hr]