X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm64.c;h=bad2854cfd39b1f6f0039849ffead9218b2b79a3;hb=0c2126b9446fea1eb2a4e4c84fcb5ac1f364e81c;hp=670f3799e1425c1bd6dababed928b0132c5361de;hpb=277718fa66c96f64360b2c97a5dfa3ef3e6f1711;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 670f3799..bad2854c 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -101,9 +101,17 @@ static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) } // Alloc cycle count into dedicated register -static void alloc_cc(struct regstat *cur,int i) +static void alloc_cc(struct regstat *cur, int i) { - alloc_arm_reg(cur,i,CCREG,HOST_CCREG); + alloc_arm_reg(cur, i, CCREG, HOST_CCREG); +} + +static void alloc_cc_optional(struct regstat *cur, int i) +{ + if (cur->regmap[HOST_CCREG] < 0) { + alloc_arm_reg(cur, i, CCREG, HOST_CCREG); + cur->noevict &= ~(1u << HOST_CCREG); + } } /* Special alloc */ @@ -484,7 +492,6 @@ static void emit_loadreg(u_int r, u_int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &psxRegs.CP0.n.SR; break; case INVCP: addr = &invc_ptr; is64 = 1; break; case ROREG: addr = &ram_offset; is64 = 1; break; default: @@ -1458,6 +1465,7 @@ static void do_readstub(int n) int i = stubs[n].a; int rs = stubs[n].b; const struct regstat *i_regs = (void *)stubs[n].c; + int adj = (int)stubs[n].d; u_int reglist = stubs[n].e; const signed char *i_regmap = i_regs->regmap; int rt; @@ -1520,12 +1528,22 @@ static void do_readstub(int n) handler=jump_handler_read32; assert(handler); pass_args64(rs,temp2); - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); + int cc, cc_use; + cc = cc_use = get_reg(i_regmap, CCREG); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); + emit_far_call(handler); - // (no cycle reload after read) + +#if 0 + // cycle reload for read32 only (value in w2 both in and out) + if (type == LOADW_STUB) { + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + } +#endif if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { loadstore_extend(type,0,rt); } @@ -1544,7 +1562,8 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, u_int is_dynamic=0; uintptr_t host_addr = 0; void *handler; - int cc=get_reg(regmap,CCREG); + int cc, cc_use; + cc = cc_use = get_reg(regmap, CCREG); //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt)) // return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); @@ -1581,9 +1600,9 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_movimm(addr,0); else if(ra!=0) emit_mov(ra,0); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,adj,2); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl); @@ -1599,7 +1618,16 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_far_call(handler); - // (no cycle reload after read) +#if 0 + // cycle reload for read32 only (value in w2 both in and out) + if (type == LOADW_STUB) { + if (!is_dynamic) + emit_far_call(do_memhandler_post); + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + } +#endif if(rt>=0&&dops[i].rt1!=0) loadstore_extend(type, 0, rt); restore_regs(reglist); @@ -1613,6 +1641,7 @@ static void do_writestub(int n) int i=stubs[n].a; int rs=stubs[n].b; struct regstat *i_regs=(struct regstat *)stubs[n].c; + int adj = (int)stubs[n].d; u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; @@ -1680,16 +1709,19 @@ static void do_writestub(int n) emit_mov64(temp2,3); host_tempreg_release(); } - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); - // returns new cycle_count + int cc, cc_use; + cc = cc_use = get_reg(i_regmap, CCREG); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); + emit_far_call(handler); - emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc); - if(cc<0) - emit_storereg(CCREG,2); - if(restore_jump) + + // new cycle_count returned in x2 + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + if (restore_jump) set_jump_target(restore_jump, out); restore_regs(reglist); emit_jmp(stubs[n].retaddr); @@ -1729,7 +1761,7 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, emit_far_call(do_memhandler_pre); emit_far_call(handler); emit_far_call(do_memhandler_post); - emit_addimm(0, -adj, cc_use); + emit_addimm(2, -adj, cc_use); if (cc < 0) emit_storereg(CCREG, cc_use); restore_regs(reglist); @@ -1943,6 +1975,65 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) } #define multdiv_assemble multdiv_assemble_arm64 +// wb_dirtys making use of stp when possible +static void wb_dirtys(const signed char i_regmap[], u_int i_dirty) +{ + signed char mregs[34+1]; + int r, hr; + memset(mregs, -1, sizeof(mregs)); + for (hr = 0; hr < HOST_REGS; hr++) { + r = i_regmap[hr]; + if (hr == EXCLUDE_REG || r <= 0 || r == CCREG) + continue; + if (!((i_dirty >> hr) & 1)) + continue; + assert(r < 34u); + mregs[r] = hr; + } + for (r = 1; r < 34; r++) { + if (mregs[r] < 0) + continue; + if (mregs[r+1] >= 0) { + uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local; + emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset); + r++; + } + else + emit_storereg(r, mregs[r]); + } +} +#define wb_dirtys wb_dirtys + +static void load_all_regs(const signed char i_regmap[]) +{ + signed char mregs[34+1]; + int r, hr; + memset(mregs, -1, sizeof(mregs)); + for (hr = 0; hr < HOST_REGS; hr++) { + r = i_regmap[hr]; + if (hr == EXCLUDE_REG || r < 0 || r == CCREG) + continue; + if ((u_int)r < 34u) + mregs[r] = hr; + else if (r < TEMPREG) + emit_loadreg(r, hr); + } + if (mregs[0] >= 0) + emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc + for (r = 1; r < 34; r++) { + if (mregs[r] < 0) + continue; + if (mregs[r+1] >= 0) { + uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local; + emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset); + r++; + } + else + emit_loadreg(r, mregs[r]); + } +} +#define load_all_regs load_all_regs + static void do_jump_vaddr(u_int rs) { if (rs != 0)