X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm.c;h=9440bb827846d1dfd3796bd3ed711d2d75728484;hp=6524d1fc4833670f5f2a6a2008884ff7c3d0a394;hb=0ff8c62ced8c9a920ac208c6d965b138c5c124dd;hpb=8a0a84238e0a50c5b5d83b06d11985aeb21d9681 diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 6524d1fc..9440bb82 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -20,6 +20,10 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifdef PCSX +#include "../gte.h" +#define FLAGLESS +#include "../gte.h" +#undef FLAGLESS #include "../gte_arm.h" #include "../gte_neon.h" #include "pcnt.h" @@ -2726,14 +2730,45 @@ emit_extjump_ds(int addr, int target) // put rt_val into rt, potentially making use of rs with value rs_val static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) { - u_int xor=rs_val^rt_val; + u_int armval; + int diff; + if(genimm(rt_val,&armval)) { + assem_debug("mov %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); + return; + } + if(genimm(~rt_val,&armval)) { + assem_debug("mvn %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); + return; + } + diff=rt_val-rs_val; + if(genimm(diff,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + return; + }else if(genimm(-diff,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff); + output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); + return; + } + emit_movimm(rt_val,rt); +} + +// return 1 if above function can do it's job cheaply +static int is_similar_value(u_int v1,u_int v2) +{ u_int xs; - for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2) + int diff; + if(v1==v2) return 1; + diff=v2-v1; + for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) ; - if(xs<0x100) - emit_xorimm(rs,xor,rt); - else - emit_movimm(rt_val,rt); + if(xs<0x100) return 1; + for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + return 0; } // trashes r2 @@ -4425,33 +4460,50 @@ static void c2op_epilogue(u_int op,u_int reglist) restore_regs_all(reglist); } +static void c2op_call_MACtoIR(int lm,int need_flags) +{ + if(need_flags) + emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + else + emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); +} + +static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) +{ + emit_call((int)func); + // func is C code and trashes r0 + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); + emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); +} + static void c2op_assemble(int i,struct regstat *i_regs) { signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; - u_int hr,reglist=0; + u_int hr,reglist_full=0,reglist; int need_flags,need_ir; for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n", gte_unneeded[i+1],need_flags,need_ir); -#ifdef ARMv5_ONLY - // let's take more risk here - need_flags=need_flags&>e_reads_flags; -#endif + if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + need_flags=0; + int shift = (source[i] >> 19) & 1; + int lm = (source[i] >> 10) & 1; switch(c2op) { case GTE_MVMVA: { - int shift = (source[i] >> 19) & 1; int v = (source[i] >> 15) & 3; int cv = (source[i] >> 13) & 3; int mx = (source[i] >> 17) & 3; - int lm = (source[i] >> 10) & 1; - reglist&=0x10ff; // +{r4-r7} + reglist=reglist_full&0x10ff; // +{r4-r7} c2op_prologue(c2op,reglist); /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ if(v<3) @@ -4484,21 +4536,52 @@ static void c2op_assemble(int i,struct regstat *i_regs) emit_movimm(shift,1); emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); } - if(need_flags||need_ir) { - if(need_flags) - emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); - else - emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked - } + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); #endif break; } + case GTE_OP: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DPCS: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); + break; + case GTE_INTPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); + break; + case GTE_SQR: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DCPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); + break; + case GTE_GPF: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); + break; + case GTE_GPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); + break; default: - reglist&=0x100f; c2op_prologue(c2op,reglist); - emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); + //emit_movimm(source[i],1); // opcode + //emit_writeword(1,(int)&psxRegs.code); emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); break; }