X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm.c;h=f2c2efa7148f12a7a5cf4b963281deb453cfc2c8;hp=6524d1fc4833670f5f2a6a2008884ff7c3d0a394;hb=4a35de071887026bb6dcd6b852738a1866959df7;hpb=8a0a84238e0a50c5b5d83b06d11985aeb21d9681 diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 6524d1fc..f2c2efa7 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -20,11 +20,19 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifdef PCSX +#include "../gte.h" +#define FLAGLESS +#include "../gte.h" +#undef FLAGLESS #include "../gte_arm.h" #include "../gte_neon.h" #include "pcnt.h" #endif +#if !BASE_ADDR_FIXED +char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); +#endif + extern int cycle_count; extern int last_count; extern int pcaddr; @@ -284,6 +292,7 @@ int isclean(int addr) return 1; } +// get source that block at addr was compiled from (host pointers) void get_bounds(int addr,u_int *start,u_int *end) { u_int *ptr=(u_int *)addr; @@ -2726,14 +2735,45 @@ emit_extjump_ds(int addr, int target) // put rt_val into rt, potentially making use of rs with value rs_val static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) { - u_int xor=rs_val^rt_val; + u_int armval; + int diff; + if(genimm(rt_val,&armval)) { + assem_debug("mov %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); + return; + } + if(genimm(~rt_val,&armval)) { + assem_debug("mvn %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); + return; + } + diff=rt_val-rs_val; + if(genimm(diff,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + return; + }else if(genimm(-diff,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff); + output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); + return; + } + emit_movimm(rt_val,rt); +} + +// return 1 if above function can do it's job cheaply +static int is_similar_value(u_int v1,u_int v2) +{ u_int xs; - for(xs=xor;xs!=0&&(xs&3)==0;xs>>=2) + int diff; + if(v1==v2) return 1; + diff=v2-v1; + for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) ; - if(xs<0x100) - emit_xorimm(rs,xor,rt); - else - emit_movimm(rt_val,rt); + if(xs<0x100) return 1; + for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + return 0; } // trashes r2 @@ -2800,7 +2840,7 @@ do_readstub(int n) temp=r; break; } } - if(rt>=0) + if(rt>=0&&rt1[i]!=0) reglist&=~(1<=0) + if(rt>=0&&rt1[i]!=0) reglist&=~(1<=0) { + if(rt>=0&&rt1[i]!=0) { switch(type) { case LOADB_STUB: emit_signextend8(0,rt); break; case LOADBU_STUB: emit_andimm(0,0xff,rt); break; @@ -3962,6 +4002,10 @@ static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) else #endif emit_jno(0); + if(ram_offset!=0) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + } } return jaddr; @@ -4013,6 +4057,10 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); } else { + if(ram_offset&&memtarget) { + emit_addimm(temp2,ram_offset,HOST_TEMPREG); + fastload_reg_override=HOST_TEMPREG; + } if (opcode[i]==0x22||opcode[i]==0x26) { emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR }else{ @@ -4180,10 +4228,10 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_writeword(HOST_CCREG,(int)&last_count); emit_movimm(0,HOST_CCREG); emit_storereg(CCREG,HOST_CCREG); - if(s!=1) - emit_mov(s,1); + emit_loadreg(rs1[i],1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0_ds); + emit_loadreg(rs1[i],s); return; } #endif @@ -4195,7 +4243,9 @@ void cop0_assemble(int i,struct regstat *i_regs) //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); //else #ifdef PCSX - if(s!=1) + if(s==HOST_CCREG) + emit_loadreg(rs1[i],1); + else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0); @@ -4204,23 +4254,21 @@ void cop0_assemble(int i,struct regstat *i_regs) #endif if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&Count,HOST_CCREG); - emit_readword((int)&next_interupt,ECX); + emit_readword((int)&next_interupt,HOST_TEMPREG); emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,(int)&last_count); + emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_writeword(HOST_TEMPREG,(int)&last_count); emit_storereg(CCREG,HOST_CCREG); } if(copr==12||copr==13) { assert(!is_delayslot); emit_readword((int)&pending_exception,14); + emit_test(14,14); + emit_jne((int)&do_interrupt); } emit_loadreg(rs1[i],s); if(get_reg(i_regs->regmap,rs1[i]|64)>=0) emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - if(copr==12||copr==13) { - emit_test(14,14); - emit_jne((int)&do_interrupt); - } cop1_usable=0; } else @@ -4425,33 +4473,51 @@ static void c2op_epilogue(u_int op,u_int reglist) restore_regs_all(reglist); } +static void c2op_call_MACtoIR(int lm,int need_flags) +{ + if(need_flags) + emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + else + emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); +} + +static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) +{ + emit_call((int)func); + // func is C code and trashes r0 + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); + emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); +} + static void c2op_assemble(int i,struct regstat *i_regs) { signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; - u_int hr,reglist=0; + u_int hr,reglist_full=0,reglist; int need_flags,need_ir; for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; - assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n", - gte_unneeded[i+1],need_flags,need_ir); -#ifdef ARMv5_ONLY - // let's take more risk here - need_flags=need_flags&>e_reads_flags; -#endif + assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", + source[i],gte_unneeded[i+1],need_flags,need_ir); + if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + need_flags=0; + int shift = (source[i] >> 19) & 1; + int lm = (source[i] >> 10) & 1; switch(c2op) { +#ifndef DRC_DBG case GTE_MVMVA: { - int shift = (source[i] >> 19) & 1; int v = (source[i] >> 15) & 3; int cv = (source[i] >> 13) & 3; int mx = (source[i] >> 17) & 3; - int lm = (source[i] >> 10) & 1; - reglist&=0x10ff; // +{r4-r7} + reglist=reglist_full&0x10ff; // +{r4-r7} c2op_prologue(c2op,reglist); /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ if(v<3) @@ -4484,21 +4550,54 @@ static void c2op_assemble(int i,struct regstat *i_regs) emit_movimm(shift,1); emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); } - if(need_flags||need_ir) { - if(need_flags) - emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); - else - emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked - } + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); #endif break; } - + case GTE_OP: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DPCS: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); + break; + case GTE_INTPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); + break; + case GTE_SQR: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DCPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); + break; + case GTE_GPF: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); + break; + case GTE_GPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); + break; +#endif default: - reglist&=0x100f; c2op_prologue(c2op,reglist); +#ifdef DRC_DBG emit_movimm(source[i],1); // opcode emit_writeword(1,(int)&psxRegs.code); +#endif emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); break; } @@ -5594,7 +5693,7 @@ void do_clear_cache() for(j=0;j<32;j++) { if(bitmap&(1<