X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm.c;h=ebf733b09113d5b6bf2ca5722abba05112eddf86;hp=c0e41162cbf7cd1c159f84b83be78f1be19c06c9;hb=cbbd8dd7705d5cb7c748a7ffaf2ccc74893b3910;hpb=b1be1eeee94d3547c20719acfa6b0082404897f1 diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index c0e41162..ebf733b0 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -19,6 +19,16 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#ifdef PCSX +#include "../gte.h" +#define FLAGLESS +#include "../gte.h" +#undef FLAGLESS +#include "../gte_arm.h" +#include "../gte_neon.h" +#include "pcnt.h" +#endif + extern int cycle_count; extern int last_count; extern int pcaddr; @@ -1143,7 +1153,7 @@ void emit_addimm(u_int rs,int imm,u_int rt) assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm); output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); }else if(genimm(-imm,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm); + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm); output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); }else if(imm<0) { assert(imm>-65536); @@ -1924,6 +1934,16 @@ void emit_movzwl_indexed(int offset, int rs, int rt) output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } +static void emit_ldrd(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} void emit_readword(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; @@ -2568,34 +2588,40 @@ void emit_jno_unlikely(int a) output_w32(0x72800000|rd_rn_rm(15,15,0)); } -// Save registers before function call -void save_regs(u_int reglist) +static void save_regs_all(u_int reglist) { - reglist&=0x100f; // only save the caller-save registers, r0-r3, r12 + int i; if(!reglist) return; assem_debug("stmia fp,{"); - if(reglist&1) assem_debug("r0, "); - if(reglist&2) assem_debug("r1, "); - if(reglist&4) assem_debug("r2, "); - if(reglist&8) assem_debug("r3, "); - if(reglist&0x1000) assem_debug("r12"); + for(i=0;i<16;i++) + if(reglist&(1<>=2) + int diff; + if(v1==v2) return 1; + diff=v2-v1; + for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) ; - if(xs<0x100) - emit_xorimm(rs,xor,rt); - else - emit_movimm(rt_val,rt); + if(xs<0x100) return 1; + for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + return 0; } // trashes r2 @@ -3175,7 +3244,6 @@ do_writestub(int n) set_jump_target(restore_jump,(int)out); restore_regs(reglist); ra=stubs[n][2]; - if(!restore_jump) ra+=4*3; // skip invcode check emit_jmp(ra); #else // if !PCSX if(addr<0) addr=get_reg(i_regmap,-1); @@ -4147,10 +4215,10 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_writeword(HOST_CCREG,(int)&last_count); emit_movimm(0,HOST_CCREG); emit_storereg(CCREG,HOST_CCREG); - if(s!=1) - emit_mov(s,1); + emit_loadreg(rs1[i],1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0_ds); + emit_loadreg(rs1[i],s); return; } #endif @@ -4162,7 +4230,9 @@ void cop0_assemble(int i,struct regstat *i_regs) //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); //else #ifdef PCSX - if(s!=1) + if(s==HOST_CCREG) + emit_loadreg(rs1[i],1); + else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0); @@ -4171,23 +4241,21 @@ void cop0_assemble(int i,struct regstat *i_regs) #endif if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&Count,HOST_CCREG); - emit_readword((int)&next_interupt,ECX); + emit_readword((int)&next_interupt,HOST_TEMPREG); emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,(int)&last_count); + emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_writeword(HOST_TEMPREG,(int)&last_count); emit_storereg(CCREG,HOST_CCREG); } if(copr==12||copr==13) { assert(!is_delayslot); emit_readword((int)&pending_exception,14); + emit_test(14,14); + emit_jne((int)&do_interrupt); } emit_loadreg(rs1[i],s); if(get_reg(i_regs->regmap,rs1[i]|64)>=0) emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - if(copr==12||copr==13) { - emit_test(14,14); - emit_jne((int)&do_interrupt); - } cop1_usable=0; } else @@ -4373,36 +4441,155 @@ void cop2_assemble(int i,struct regstat *i_regs) } } -void c2op_assemble(int i,struct regstat *i_regs) +static void c2op_prologue(u_int op,u_int reglist) +{ + save_regs_all(reglist); +#ifdef PCNT + emit_movimm(op,0); + emit_call((int)pcnt_gte_start); +#endif + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs +} + +static void c2op_epilogue(u_int op,u_int reglist) +{ +#ifdef PCNT + emit_movimm(op,0); + emit_call((int)pcnt_gte_end); +#endif + restore_regs_all(reglist); +} + +static void c2op_call_MACtoIR(int lm,int need_flags) +{ + if(need_flags) + emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + else + emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); +} + +static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) +{ + emit_call((int)func); + // func is C code and trashes r0 + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); + emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); +} + +static void c2op_assemble(int i,struct regstat *i_regs) { signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; - u_int hr,reglist=0; - int need_flags; + u_int hr,reglist_full=0,reglist; + int need_flags,need_ir; for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[hr]>=0) reglist_full|=1<regmap,CCREG); - emit_movimm(source[i],1); // opcode - if (cc>=0&>e_cycletab[c2op]) - emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj? - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs - emit_writeword(1,(int)&psxRegs.code); need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works - assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags); -#ifdef ARMv5_ONLY - // let's take more risk here - need_flags=need_flags&>e_reads_flags; + need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; + assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", + source[i],gte_unneeded[i+1],need_flags,need_ir); + if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + need_flags=0; + int shift = (source[i] >> 19) & 1; + int lm = (source[i] >> 10) & 1; + switch(c2op) { +#ifndef DRC_DBG + case GTE_MVMVA: { + int v = (source[i] >> 15) & 3; + int cv = (source[i] >> 13) & 3; + int mx = (source[i] >> 17) & 3; + reglist=reglist_full&0x10ff; // +{r4-r7} + c2op_prologue(c2op,reglist); + /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ + if(v<3) + emit_ldrd(v*8,0,4); + else { + emit_movzwl_indexed(9*4,0,4); // gteIR + emit_movzwl_indexed(10*4,0,6); + emit_movzwl_indexed(11*4,0,5); + emit_orrshl_imm(6,16,4); + } + if(mx<3) + emit_addimm(0,32*4+mx*8*4,6); + else + emit_readword((int)&zeromem_ptr,6); + if(cv<3) + emit_addimm(0,32*4+(cv*8+5)*4,7); + else + emit_readword((int)&zeromem_ptr,7); +#ifdef __ARM_NEON__ + emit_movimm(source[i],1); // opcode + emit_call((int)gteMVMVA_part_neon); + if(need_flags) { + emit_movimm(lm,1); + emit_call((int)gteMACtoIR_flags_neon); + } +#else + if(cv==3&&shift) + emit_call((int)gteMVMVA_part_cv3sh12_arm); + else { + emit_movimm(shift,1); + emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); + } + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); +#endif + break; + } + case GTE_OP: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DPCS: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); + break; + case GTE_INTPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); + break; + case GTE_SQR: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DCPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); + break; + case GTE_GPF: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); + break; + case GTE_GPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); + break; #endif - emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); + default: + c2op_prologue(c2op,reglist); +#ifdef DRC_DBG + emit_movimm(source[i],1); // opcode + emit_writeword(1,(int)&psxRegs.code); +#endif + emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); + break; + } + c2op_epilogue(c2op,reglist); } - - if(i>=slen-1||itype[i+1]!=C2OP) - restore_regs(reglist); } void cop1_unusable(int i,struct regstat *i_regs)