X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm.c;h=ebf733b09113d5b6bf2ca5722abba05112eddf86;hp=5373e7041921fd4649dba9c058bd9815b863696e;hb=cbbd8dd7705d5cb7c748a7ffaf2ccc74893b3910;hpb=27727b63bc3da8a9e0affc33fe9d3e0ad1b1d42d diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 5373e704..ebf733b0 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -20,6 +20,10 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifdef PCSX +#include "../gte.h" +#define FLAGLESS +#include "../gte.h" +#undef FLAGLESS #include "../gte_arm.h" #include "../gte_neon.h" #include "pcnt.h" @@ -1149,7 +1153,7 @@ void emit_addimm(u_int rs,int imm,u_int rt) assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm); output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); }else if(genimm(-imm,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm); + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm); output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); }else if(imm<0) { assert(imm>-65536); @@ -2726,14 +2730,45 @@ emit_extjump_ds(int addr, int target) // put rt_val into rt, potentially making use of rs with value rs_val static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) { - u_int xor=rs_val^rt_val; + u_int armval; + int diff; + if(genimm(rt_val,&armval)) { + assem_debug("mov %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); + return; + } + if(genimm(~rt_val,&armval)) { + assem_debug("mvn %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); + return; + } + diff=rt_val-rs_val; + if(genimm(diff,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + return; + }else if(genimm(-diff,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff); + output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); + return; + } + emit_movimm(rt_val,rt); +} + +// return 1 if above function can do it's job cheaply +static int is_similar_value(u_int v1,u_int v2) +{ u_int xs; - for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2) + int diff; + if(v1==v2) return 1; + diff=v2-v1; + for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) ; - if(xs<0x100) - emit_xorimm(rs,xor,rt); - else - emit_movimm(rt_val,rt); + if(xs<0x100) return 1; + for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + return 0; } // trashes r2 @@ -4180,10 +4215,10 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_writeword(HOST_CCREG,(int)&last_count); emit_movimm(0,HOST_CCREG); emit_storereg(CCREG,HOST_CCREG); - if(s!=1) - emit_mov(s,1); + emit_loadreg(rs1[i],1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0_ds); + emit_loadreg(rs1[i],s); return; } #endif @@ -4195,7 +4230,9 @@ void cop0_assemble(int i,struct regstat *i_regs) //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); //else #ifdef PCSX - if(s!=1) + if(s==HOST_CCREG) + emit_loadreg(rs1[i],1); + else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0); @@ -4204,23 +4241,21 @@ void cop0_assemble(int i,struct regstat *i_regs) #endif if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&Count,HOST_CCREG); - emit_readword((int)&next_interupt,ECX); + emit_readword((int)&next_interupt,HOST_TEMPREG); emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,(int)&last_count); + emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_writeword(HOST_TEMPREG,(int)&last_count); emit_storereg(CCREG,HOST_CCREG); } if(copr==12||copr==13) { assert(!is_delayslot); emit_readword((int)&pending_exception,14); + emit_test(14,14); + emit_jne((int)&do_interrupt); } emit_loadreg(rs1[i],s); if(get_reg(i_regs->regmap,rs1[i]|64)>=0) emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - if(copr==12||copr==13) { - emit_test(14,14); - emit_jne((int)&do_interrupt); - } cop1_usable=0; } else @@ -4425,33 +4460,51 @@ static void c2op_epilogue(u_int op,u_int reglist) restore_regs_all(reglist); } +static void c2op_call_MACtoIR(int lm,int need_flags) +{ + if(need_flags) + emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + else + emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); +} + +static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) +{ + emit_call((int)func); + // func is C code and trashes r0 + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); + emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); +} + static void c2op_assemble(int i,struct regstat *i_regs) { signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; - u_int hr,reglist=0; + u_int hr,reglist_full=0,reglist; int need_flags,need_ir; for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; - assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n", - gte_unneeded[i+1],need_flags,need_ir); -#ifdef ARMv5_ONLY - // let's take more risk here - need_flags=need_flags&>e_reads_flags; -#endif + assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", + source[i],gte_unneeded[i+1],need_flags,need_ir); + if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + need_flags=0; + int shift = (source[i] >> 19) & 1; + int lm = (source[i] >> 10) & 1; switch(c2op) { +#ifndef DRC_DBG case GTE_MVMVA: { - int shift = (source[i] >> 19) & 1; int v = (source[i] >> 15) & 3; int cv = (source[i] >> 13) & 3; int mx = (source[i] >> 17) & 3; - int lm = (source[i] >> 10) & 1; - reglist&=0x10ff; // +{r4-r7} + reglist=reglist_full&0x10ff; // +{r4-r7} c2op_prologue(c2op,reglist); /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ if(v<3) @@ -4484,21 +4537,54 @@ static void c2op_assemble(int i,struct regstat *i_regs) emit_movimm(shift,1); emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); } - if(need_flags||need_ir) { - if(need_flags) - emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); - else - emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked - } + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); #endif break; } - + case GTE_OP: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DPCS: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); + break; + case GTE_INTPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); + break; + case GTE_SQR: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DCPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); + break; + case GTE_GPF: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); + break; + case GTE_GPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); + break; +#endif default: - reglist&=0x100f; c2op_prologue(c2op,reglist); +#ifdef DRC_DBG emit_movimm(source[i],1); // opcode emit_writeword(1,(int)&psxRegs.code); +#endif emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); break; }