at least the ones from C source, it looks like there could be more.
#include "gte.h"
#include "psxmem.h"
+typedef struct psxCP2Regs {
+ psxCP2Data CP2D; /* Cop2 data registers */
+ psxCP2Ctrl CP2C; /* Cop2 control registers */
+} psxCP2Regs;
+
#define VX(n) (n < 3 ? regs->CP2D.p[n << 1].sw.l : regs->CP2D.p[9].sw.l)
#define VY(n) (n < 3 ? regs->CP2D.p[n << 1].sw.h : regs->CP2D.p[10].sw.l)
#define VZ(n) (n < 3 ? regs->CP2D.p[(n << 1) + 1].sw.l : regs->CP2D.p[11].sw.l)
gteG2 = limC2(gteMAC2 >> 4);
gteB2 = limC3(gteMAC3 >> 4);
}
+
+/* decomposed/parametrized versions for the recompiler */
+
+#ifndef FLAGLESS
+
+void gteSQR_part_noshift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = gteIR1 * gteIR1;
+ gteMAC2 = gteIR2 * gteIR2;
+ gteMAC3 = gteIR3 * gteIR3;
+}
+
+void gteSQR_part_shift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = (gteIR1 * gteIR1) >> 12;
+ gteMAC2 = (gteIR2 * gteIR2) >> 12;
+ gteMAC3 = (gteIR3 * gteIR3) >> 12;
+}
+
+void gteOP_part_noshift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = (gteR22 * gteIR3) - (gteR33 * gteIR2);
+ gteMAC2 = (gteR33 * gteIR1) - (gteR11 * gteIR3);
+ gteMAC3 = (gteR11 * gteIR2) - (gteR22 * gteIR1);
+}
+
+void gteOP_part_shift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = ((gteR22 * gteIR3) - (gteR33 * gteIR2)) >> 12;
+ gteMAC2 = ((gteR33 * gteIR1) - (gteR11 * gteIR3)) >> 12;
+ gteMAC3 = ((gteR11 * gteIR2) - (gteR22 * gteIR1)) >> 12;
+}
+
+void gteDCPL_part(psxCP2Regs *regs) {
+ s32 RIR1 = ((s32)gteR * gteIR1) >> 8;
+ s32 GIR2 = ((s32)gteG * gteIR2) >> 8;
+ s32 BIR3 = ((s32)gteB * gteIR3) >> 8;
+
+ gteFLAG = 0;
+
+ gteMAC1 = RIR1 + ((gteIR0 * limB1(A1U((s64)gteRFC - RIR1), 0)) >> 12);
+ gteMAC2 = GIR2 + ((gteIR0 * limB1(A2U((s64)gteGFC - GIR2), 0)) >> 12);
+ gteMAC3 = BIR3 + ((gteIR0 * limB1(A3U((s64)gteBFC - BIR3), 0)) >> 12);
+}
+
+void gteGPF_part_noshift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = gteIR0 * gteIR1;
+ gteMAC2 = gteIR0 * gteIR2;
+ gteMAC3 = gteIR0 * gteIR3;
+}
+
+void gteGPF_part_shift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = (gteIR0 * gteIR1) >> 12;
+ gteMAC2 = (gteIR0 * gteIR2) >> 12;
+ gteMAC3 = (gteIR0 * gteIR3) >> 12;
+}
+
+#endif // !FLAGLESS
+
+void gteGPL_part_noshift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = A1((s64)gteMAC1 + (gteIR0 * gteIR1));
+ gteMAC2 = A2((s64)gteMAC2 + (gteIR0 * gteIR2));
+ gteMAC3 = A3((s64)gteMAC3 + (gteIR0 * gteIR3));
+}
+
+void gteGPL_part_shift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = A1((s64)gteMAC1 + ((gteIR0 * gteIR1) >> 12));
+ gteMAC2 = A2((s64)gteMAC2 + ((gteIR0 * gteIR2) >> 12));
+ gteMAC3 = A3((s64)gteMAC3 + ((gteIR0 * gteIR3) >> 12));
+}
+
+void gteDPCS_part_noshift(psxCP2Regs *regs) {
+ int shift = 0;
+
+ gteFLAG = 0;
+
+ gteMAC1 = ((gteR << 16) + (gteIR0 * limB1(A1U((s64)gteRFC - (gteR << 4)) << (12 - shift), 0))) >> 12;
+ gteMAC2 = ((gteG << 16) + (gteIR0 * limB2(A2U((s64)gteGFC - (gteG << 4)) << (12 - shift), 0))) >> 12;
+ gteMAC3 = ((gteB << 16) + (gteIR0 * limB3(A3U((s64)gteBFC - (gteB << 4)) << (12 - shift), 0))) >> 12;
+}
+
+void gteDPCS_part_shift(psxCP2Regs *regs) {
+ int shift = 12;
+
+ gteFLAG = 0;
+
+ gteMAC1 = ((gteR << 16) + (gteIR0 * limB1(A1U((s64)gteRFC - (gteR << 4)) << (12 - shift), 0))) >> 12;
+ gteMAC2 = ((gteG << 16) + (gteIR0 * limB2(A2U((s64)gteGFC - (gteG << 4)) << (12 - shift), 0))) >> 12;
+ gteMAC3 = ((gteB << 16) + (gteIR0 * limB3(A3U((s64)gteBFC - (gteB << 4)) << (12 - shift), 0))) >> 12;
+}
+
+void gteINTPL_part_noshift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = ((gteIR1 << 12) + (gteIR0 * limB1(A1U((s64)gteRFC - gteIR1), 0)));
+ gteMAC2 = ((gteIR2 << 12) + (gteIR0 * limB2(A2U((s64)gteGFC - gteIR2), 0)));
+ gteMAC3 = ((gteIR3 << 12) + (gteIR0 * limB3(A3U((s64)gteBFC - gteIR3), 0)));
+}
+
+void gteINTPL_part_shift(psxCP2Regs *regs) {
+ gteFLAG = 0;
+
+ gteMAC1 = ((gteIR1 << 12) + (gteIR0 * limB1(A1U((s64)gteRFC - gteIR1), 0))) >> 12;
+ gteMAC2 = ((gteIR2 << 12) + (gteIR0 * limB2(A2U((s64)gteGFC - gteIR2), 0))) >> 12;
+ gteMAC3 = ((gteIR3 << 12) + (gteIR0 * limB3(A3U((s64)gteBFC - gteIR3), 0))) >> 12;
+}
+
+void gteMACtoRGB(psxCP2Regs *regs) {
+ gteRGB0 = gteRGB1;
+ gteRGB1 = gteRGB2;
+ gteCODE2 = gteCODE;
+ gteR2 = limC1(gteMAC1 >> 4);
+ gteG2 = limC2(gteMAC2 >> 4);
+ gteB2 = limC3(gteMAC3 >> 4);
+}
+
#define gteGPL gteGPL_nf
#define gteNCCT gteNCCT_nf
+#define gteGPL_part_noshift gteGPL_part_noshift_nf
+#define gteGPL_part_shift gteGPL_part_shift_nf
+#define gteDPCS_part_noshift gteDPCS_part_noshift_nf
+#define gteDPCS_part_shift gteDPCS_part_shift_nf
+#define gteINTPL_part_noshift gteINTPL_part_noshift_nf
+#define gteINTPL_part_shift gteINTPL_part_shift_nf
+#define gteMACtoRGB gteMACtoRGB_nf
+
+#undef __GTE_H__
#endif
#ifndef __GTE_H__
#include "psxcommon.h"
#include "r3000a.h"
-typedef struct {
- psxCP2Data CP2D; /* Cop2 data registers */
- psxCP2Ctrl CP2C; /* Cop2 control registers */
-} psxCP2Regs;
+struct psxCP2Regs;
void gteMFC2();
void gteCFC2();
void gteLWC2();
void gteSWC2();
-void gteRTPS(psxCP2Regs *regs);
-void gteOP(psxCP2Regs *regs);
-void gteNCLIP(psxCP2Regs *regs);
-void gteDPCS(psxCP2Regs *regs);
-void gteINTPL(psxCP2Regs *regs);
-void gteMVMVA(psxCP2Regs *regs);
-void gteNCDS(psxCP2Regs *regs);
-void gteNCDT(psxCP2Regs *regs);
-void gteCDP(psxCP2Regs *regs);
-void gteNCCS(psxCP2Regs *regs);
-void gteCC(psxCP2Regs *regs);
-void gteNCS(psxCP2Regs *regs);
-void gteNCT(psxCP2Regs *regs);
-void gteSQR(psxCP2Regs *regs);
-void gteDCPL(psxCP2Regs *regs);
-void gteDPCT(psxCP2Regs *regs);
-void gteAVSZ3(psxCP2Regs *regs);
-void gteAVSZ4(psxCP2Regs *regs);
-void gteRTPT(psxCP2Regs *regs);
-void gteGPF(psxCP2Regs *regs);
-void gteGPL(psxCP2Regs *regs);
-void gteNCCT(psxCP2Regs *regs);
+void gteRTPS(struct psxCP2Regs *regs);
+void gteOP(struct psxCP2Regs *regs);
+void gteNCLIP(struct psxCP2Regs *regs);
+void gteDPCS(struct psxCP2Regs *regs);
+void gteINTPL(struct psxCP2Regs *regs);
+void gteMVMVA(struct psxCP2Regs *regs);
+void gteNCDS(struct psxCP2Regs *regs);
+void gteNCDT(struct psxCP2Regs *regs);
+void gteCDP(struct psxCP2Regs *regs);
+void gteNCCS(struct psxCP2Regs *regs);
+void gteCC(struct psxCP2Regs *regs);
+void gteNCS(struct psxCP2Regs *regs);
+void gteNCT(struct psxCP2Regs *regs);
+void gteSQR(struct psxCP2Regs *regs);
+void gteDCPL(struct psxCP2Regs *regs);
+void gteDPCT(struct psxCP2Regs *regs);
+void gteAVSZ3(struct psxCP2Regs *regs);
+void gteAVSZ4(struct psxCP2Regs *regs);
+void gteRTPT(struct psxCP2Regs *regs);
+void gteGPF(struct psxCP2Regs *regs);
+void gteGPL(struct psxCP2Regs *regs);
+void gteNCCT(struct psxCP2Regs *regs);
+
+void gteSQR_part_noshift(struct psxCP2Regs *regs);
+void gteSQR_part_shift(struct psxCP2Regs *regs);
+void gteOP_part_noshift(struct psxCP2Regs *regs);
+void gteOP_part_shift(struct psxCP2Regs *regs);
+void gteDCPL_part(struct psxCP2Regs *regs);
+void gteGPF_part_noshift(struct psxCP2Regs *regs);
+void gteGPF_part_shift(struct psxCP2Regs *regs);
+
+void gteGPL_part_noshift(struct psxCP2Regs *regs);
+void gteGPL_part_shift(struct psxCP2Regs *regs);
+void gteDPCS_part_noshift(struct psxCP2Regs *regs);
+void gteDPCS_part_shift(struct psxCP2Regs *regs);
+void gteINTPL_part_noshift(struct psxCP2Regs *regs);
+void gteINTPL_part_shift(struct psxCP2Regs *regs);
+void gteMACtoRGB(struct psxCP2Regs *regs);
#ifdef __cplusplus
}
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#ifdef PCSX
+#include "../gte.h"
+#define FLAGLESS
+#include "../gte.h"
+#undef FLAGLESS
#include "../gte_arm.h"
#include "../gte_neon.h"
#include "pcnt.h"
restore_regs_all(reglist);
}
+static void c2op_call_MACtoIR(int lm,int need_flags)
+{
+ if(need_flags)
+ emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
+ else
+ emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
+}
+
+static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
+{
+ emit_call((int)func);
+ // func is C code and trashes r0
+ emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
+ if(need_flags||need_ir)
+ c2op_call_MACtoIR(lm,need_flags);
+ emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
+}
+
static void c2op_assemble(int i,struct regstat *i_regs)
{
signed char temp=get_reg(i_regs->regmap,-1);
u_int c2op=source[i]&0x3f;
- u_int hr,reglist=0;
+ u_int hr,reglist_full=0,reglist;
int need_flags,need_ir;
for(hr=0;hr<HOST_REGS;hr++) {
- if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
+ if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
}
+ reglist=reglist_full&0x100f;
if (gte_handlers[c2op]!=NULL) {
need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
// let's take more risk here
need_flags=need_flags&>e_reads_flags;
#endif
+ int shift = (source[i] >> 19) & 1;
+ int lm = (source[i] >> 10) & 1;
switch(c2op) {
case GTE_MVMVA: {
- int shift = (source[i] >> 19) & 1;
int v = (source[i] >> 15) & 3;
int cv = (source[i] >> 13) & 3;
int mx = (source[i] >> 17) & 3;
- int lm = (source[i] >> 10) & 1;
- reglist&=0x10ff; // +{r4-r7}
+ reglist=reglist_full&0x10ff; // +{r4-r7}
c2op_prologue(c2op,reglist);
/* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
if(v<3)
emit_movimm(shift,1);
emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
}
- if(need_flags||need_ir) {
- if(need_flags)
- emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
- else
- emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked
- }
+ if(need_flags||need_ir)
+ c2op_call_MACtoIR(lm,need_flags);
#endif
break;
}
+ case GTE_OP:
+ c2op_prologue(c2op,reglist);
+ emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
+ if(need_flags||need_ir) {
+ emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
+ c2op_call_MACtoIR(lm,need_flags);
+ }
+ break;
+ case GTE_DPCS:
+ c2op_prologue(c2op,reglist);
+ c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
+ break;
+ case GTE_INTPL:
+ c2op_prologue(c2op,reglist);
+ c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
+ break;
+ case GTE_SQR:
+ c2op_prologue(c2op,reglist);
+ emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
+ if(need_flags||need_ir) {
+ emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
+ c2op_call_MACtoIR(lm,need_flags);
+ }
+ break;
+ case GTE_DCPL:
+ c2op_prologue(c2op,reglist);
+ c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
+ break;
+ case GTE_GPF:
+ c2op_prologue(c2op,reglist);
+ c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
+ break;
+ case GTE_GPL:
+ c2op_prologue(c2op,reglist);
+ c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
+ break;
default:
- reglist&=0x100f;
c2op_prologue(c2op,reglist);
- emit_movimm(source[i],1); // opcode
- emit_writeword(1,(int)&psxRegs.code);
+ //emit_movimm(source[i],1); // opcode
+ //emit_writeword(1,(int)&psxRegs.code);
emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
break;
}
void (*psxSPC[64])();
void (*psxREG[32])();
void (*psxCP0[32])();
-void (*psxCP2[64])(psxCP2Regs *regs);
+void (*psxCP2[64])(struct psxCP2Regs *regs);
void (*psxCP2BSC[32])();
static void delayRead(int reg, u32 bpc) {
}
void psxCOP2() {
- psxCP2[_Funct_]((psxCP2Regs *)&psxRegs.CP2D);
+ psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D);
}
-void psxBASIC(psxCP2Regs *regs) {
+void psxBASIC(struct psxCP2Regs *regs) {
psxCP2BSC[_Rs_]();
}
psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL
};
-void (*psxCP2[64])(psxCP2Regs *regs) = {
+void (*psxCP2[64])(struct psxCP2Regs *regs) = {
psxBASIC, gteRTPS , psxNULL , psxNULL, psxNULL, psxNULL , gteNCLIP, psxNULL, // 00
psxNULL , psxNULL , psxNULL , psxNULL, gteOP , psxNULL , psxNULL , psxNULL, // 08
gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , psxNULL , gteNCDT , psxNULL, // 10