From bedfea3863c3c48699048ea0d6dd07893221403c Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 25 Sep 2011 22:45:35 +0300 Subject: [PATCH] drc: do gte flag liveness detection just copy-paste existing code and use _nf handlers from previous patch. --- libpcsxcore/gte.c | 6 +- libpcsxcore/new_dynarec/assem_arm.c | 9 ++- libpcsxcore/new_dynarec/emu_if.c | 11 ++++ libpcsxcore/new_dynarec/emu_if.h | 1 + libpcsxcore/new_dynarec/new_dynarec.c | 81 +++++++++++++++++++++++++-- 5 files changed, 100 insertions(+), 8 deletions(-) diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 0acca653..16e0a89c 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -212,7 +212,11 @@ static inline s32 LIM(s32 value, s32 max, s32 min, u32 flag_unused) { return ret; } -#define limE(a) ((a) & 0x1ffff) +static inline u32 limE(u32 result) { + if (result > 0x1ffff) + return 0x1ffff; + return result; +} #endif diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 2a0a2146..adbde599 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -3796,6 +3796,7 @@ void c2op_assemble(int i,struct regstat *i_regs) signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; u_int hr,reglist=0; + int need_flags; for(hr=0;hrregmap[hr]>=0) reglist|=1<>63); // +1 because of how liveness detection works + assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags); +#ifdef ARMv5_ONLY + // let's take more risk here + need_flags=need_flags&>e_reads_flags; +#endif + emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); } if(i>=slen-1||itype[i+1]!=C2OP) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index fbd4f964..f65e5bd5 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -154,6 +154,17 @@ void *gte_handlers_nf[64] = { NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38 }; +const char *gte_regnames[64] = { + NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00 + NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08 + "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10 + NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18 + "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28 + "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 +}; + /* from gte.txt.. not sure if this is any good. */ const char gte_cycletab[64] = { /* 1 2 3 4 5 6 7 8 9 a b c d e f */ diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index e16cca54..88749bec 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -31,6 +31,7 @@ extern int reg_cop0[]; extern int reg_cop2d[], reg_cop2c[]; extern void *gte_handlers[64]; extern void *gte_handlers_nf[64]; +extern const char *gte_regnames[64]; extern const char gte_cycletab[64]; /* dummy */ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 666b4d44..716b1d41 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -80,6 +80,10 @@ struct ll_entry u_char dep1[MAXBLOCK]; u_char dep2[MAXBLOCK]; u_char lt1[MAXBLOCK]; + static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs + static uint64_t gte_rt[MAXBLOCK]; + static uint64_t gte_unneeded[MAXBLOCK]; + static int gte_reads_flags; // gte flag read encountered int imm[MAXBLOCK]; u_int ba[MAXBLOCK]; char likely[MAXBLOCK]; @@ -6710,8 +6714,8 @@ static void pagespan_ds() void unneeded_registers(int istart,int iend,int r) { int i; - uint64_t u,uu,b,bu; - uint64_t temp_u,temp_uu; + uint64_t u,uu,gte_u,b,bu,gte_bu; + uint64_t temp_u,temp_uu,temp_gte_u; uint64_t tdep; if(iend==slen-1) { u=1;uu=1; @@ -6720,6 +6724,8 @@ void unneeded_registers(int istart,int iend,int r) uu=unneeded_reg_upper[iend+1]; u=1;uu=1; } + gte_u=temp_gte_u=0; + for (i=iend;i>=istart;i--) { //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r); @@ -6733,6 +6739,7 @@ void unneeded_registers(int istart,int iend,int r) // Branch out of this block, flush all regs u=1; uu=1; + gte_u=0; /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { @@ -6764,17 +6771,21 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>rt1[i+1])&1; @@ -6801,17 +6814,21 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>rt1[i])&1; @@ -6821,8 +6838,11 @@ void unneeded_registers(int istart,int iend,int r) temp_uu&=~((1LL<>2]=1; unneeded_reg_upper[(ba[i]-start)>>2]=1; + gte_unneeded[(ba[i]-start)>>2]=0; } } /*else*/ if(1) { if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) @@ -6837,6 +6858,7 @@ void unneeded_registers(int istart,int iend,int r) // Unconditional branch u=unneeded_reg[(ba[i]-start)>>2]; uu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_u=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=u; branch_unneeded_reg_upper[i]=uu; //u=1; @@ -6851,10 +6873,13 @@ void unneeded_registers(int istart,int iend,int r) uu&=~((1LL<>2]; bu=unneeded_reg_upper[(ba[i]-start)>>2]; + gte_bu=gte_unneeded[(ba[i]-start)>>2]; branch_unneeded_reg[i]=b; branch_unneeded_reg_upper[i]=bu; //b=1; @@ -6869,20 +6894,25 @@ void unneeded_registers(int istart,int iend,int r) bu&=~((1LL<>21)&0x1f; - if (source[i]&0x3f) { + //if (op2 & 0x10) { + if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns if (gte_handlers[source[i]&0x3f]!=NULL) { - snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); + if (gte_regnames[source[i]&0x3f]!=NULL) + strcpy(insn[i],gte_regnames[source[i]&0x3f]); + else + snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); type=C2OP; } } @@ -8376,6 +8413,7 @@ int new_recompile_block(int addr) us2[i]=0; dep1[i]=0; dep2[i]=0; + gte_rs[i]=gte_rt[i]=0; switch(type) { case LOAD: rs1[i]=(source[i]>>21)&0x1f; @@ -8539,7 +8577,6 @@ int new_recompile_block(int addr) if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET break; case COP1: - case COP2: rs1[i]=0; rs2[i]=0; rt1[i]=0; @@ -8549,6 +8586,28 @@ int new_recompile_block(int addr) if(op2==5) us1[i]=rs1[i]; // DMTC1 rs2[i]=CSREG; break; + case COP2: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2 + if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2 + rs2[i]=CSREG; + int gr=(source[i]>>11)&0x1F; + switch(op2) + { + case 0x00: gte_rs[i]=1ll<>21)&0x1F; rs2[i]=CSREG; @@ -8562,6 +8621,16 @@ int new_recompile_block(int addr) rt1[i]=0; rt2[i]=0; imm[i]=(short)source[i]; + if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 + else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 + break; + case C2OP: + rs1[i]=0; + rs2[i]=0; + rt1[i]=0; + rt2[i]=0; + gte_rt[i]=1ll<<63; // every op changes flags + // TODO: other regs? break; case FLOAT: case FCONV: -- 2.39.5