bool bHalfTxtScale=false;
extern uint32 lastSetTile;
+#ifndef __ARM_NEON__
+static void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s)
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ d[i] = m1[i] * m2[i] - s[i];
+}
+#else
+extern "C" void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s);
+#endif
+
void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL)
{
VTX_DUMP(TRACE2("Init vertex (%d) to vtx buf[%d]:", dwV, vtxIndex));
}
else
{
- float tex0u = g_fVtxTxtCoords[dwV].x *gRSP.tex0scaleX - gRSP.tex0OffsetX ;
- float tex0v = g_fVtxTxtCoords[dwV].y *gRSP.tex0scaleY - gRSP.tex0OffsetY ;
+ TexCord tex0;
+ multiply_subtract2(&tex0.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex0scaleX, &gRSP.tex0OffsetX);
if( CRender::g_pRender->IsTexel1Enable() )
{
- float tex1u = g_fVtxTxtCoords[dwV].x *gRSP.tex1scaleX - gRSP.tex1OffsetX ;
- float tex1v = g_fVtxTxtCoords[dwV].y *gRSP.tex1scaleY - gRSP.tex1OffsetY ;
+ TexCord tex1;
+ multiply_subtract2(&tex1.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex1scaleX, &gRSP.tex1OffsetX);
- CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v, tex1u, tex1v);
- VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0u,tex0v));
- VTX_DUMP(TRACE2(" (tex1): %f, %f", tex1u,tex1v));
+ CRender::g_pRender->SetVertexTextureUVCoord(v, tex0, tex1);
+ VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0.u,tex0.v));
+ VTX_DUMP(TRACE2(" (tex1): %f, %f", tex1.u,tex1.v));
}
else
{
- CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v);
- VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0u,tex0v));
+ CRender::g_pRender->SetVertexTextureUVCoord(v, tex0);
+ VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0.u,tex0.v));
}
}
uint32 gRSPnumLights, float gRSPfFogMin,
uint32 primitiveColor, uint32 primitiveColor_);
-// debug
-//#define DO_CMP
-#ifdef DO_CMP
-// note: don't forget -fno-associative-math
-static XVECTOR4 n_transformed[2], n_projected[2];
-static uint32 n_color[2];
-static VECTOR2 n_vtxcoords[2];
-static float n_fogcoord[2];
-static uint32 n_clipflag2[2];
-
-static int do_cmp_f(void *a, void *b, int c)
-{
- int *ia = (int *)a, *ib = (int *)b;
- for (int i = 0; i < c; i++) {
- int di = abs(ia[i] - ib[i]);
- if (di > 7) {
- printf("di: %d\n", di);
- return 1;
- }
- }
- return 0;
-}
-
-static int do_cmp_c(uint32 a, uint32 b)
-{
- if (abs(((a >> 0) & 0xff) - ((b >> 0) & 0xff)) > 1)
- return 1;
- if (abs(((a >> 8) & 0xff) - ((b >> 8) & 0xff)) > 1)
- return 1;
- if (abs(((a >> 16) & 0xff) - ((b >> 16) & 0xff)) > 1)
- return 1;
- if (abs(((a >> 24) & 0xff) - ((b >> 24) & 0xff)) > 1)
- return 1;
-
- return 0;
-}
-
-static void do_cmp(int i, int s, int neon_state)
-{
- static int ccnt;
- int bad = 0;
-
- // if (memcmp(&n_transformed, &g_vtxTransformed[i], sizeof(XVECTOR4)))
- if (do_cmp_f(&n_transformed[s], &g_vtxTransformed[i], 4)) {
- printf("transformed:\n%13.8e %13.8e %13.8e %13.8e\n"
- "%13.8e %13.8e %13.8e %13.8e\n",
- n_transformed[s].x, n_transformed[s].y,
- n_transformed[s].z, n_transformed[s].w,
- g_vtxTransformed[i].x, g_vtxTransformed[i].y,
- g_vtxTransformed[i].z, g_vtxTransformed[i].w);
- bad = 1;
- }
- if (do_cmp_f(&n_projected[s], &g_vecProjected[i], 4)) {
- printf("projected:\n%13.8e %13.8e %13.8e %13.8e |%08x\n"
- "%13.8e %13.8e %13.8e %13.8e |%08x\n",
- n_projected[s].x, n_projected[s].y,
- n_projected[s].z, n_projected[s].w,
- *(uint32 *)&n_projected[s].w,
- g_vecProjected[i].x, g_vecProjected[i].y,
- g_vecProjected[i].z, g_vecProjected[i].w,
- *(uint32 *)&g_vecProjected[i].w);
- bad = 1;
- }
- if (n_vtxcoords[s].x != g_fVtxTxtCoords[i].x
- || n_vtxcoords[s].y != g_fVtxTxtCoords[i].y)
- {
- printf("vtxcoords:\n%13.8e %13.8e\n%13.8e %13.8e\n",
- n_vtxcoords[s].x, n_vtxcoords[s].y,
- g_fVtxTxtCoords[i].x, g_fVtxTxtCoords[i].y);
- bad = 1;
- }
- if (n_clipflag2[s] != g_clipFlag2[i]) {
- printf("clipflag2: %08x %08x\n", n_clipflag2[s], g_clipFlag2[i]);
- bad = 1;
- }
- if (do_cmp_c(n_color[s], g_dwVtxDifColor[i])) {
- printf("n_color: %08x %08x\n", n_color[s], g_dwVtxDifColor[i]);
- bad = 1;
- }
- if (!(neon_state & PV_NEON_ENABLE_SHADE))
- printf("!ENABLE_SHADE!\n");
- if (bad) {
- printf("%d s=%d, state %02x\n", ccnt, s, neon_state);
- printf(".w %08x %08x\n",
- *(uint32 *)&n_projected[s].w, *(uint32 *)&g_vecProjected[i].w);
- exit(ccnt);
- }
- ccnt++;
-}
-#endif
+extern "C" int tv_direction(const XVECTOR4 *v0, const XVECTOR4 *v1, const XVECTOR4 *v2);
void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum)
{
neon_state |= PV_NEON_FOG_ALPHA;
uint32 i;
-#ifdef DO_CMP
- uint32 s = 0;
-#endif
UpdateCombinedMatrix();
// SP_Timing(RSP_GBI0_Vtx);
status.SPCycleCount += Timing_RSP_GBI0_Vtx * dwNum;
-//#define DO_CC
-#ifdef DO_CC
- asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(i));
- i |= 5; // master enable, ccnt reset
- i &= ~8; // ccnt divider 0
- asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(i));
- // enable cycle counter
- asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(1<<31));
- unsigned int cc_start;
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc_start));
-#endif
-
#if 1
i = dwV0;
pv_neon(&g_vtxTransformed[i], &g_vecProjected[i],
#else
for (i = dwV0; i < dwV0 + dwNum; i++)
{
-#ifdef DO_CMP
- if (!(s & 1))
- pv_neon(n_transformed, n_projected,
- n_color, n_vtxcoords,
- n_fogcoord, n_clipflag2,
- 1, neon_state, &pVtxBase[i - dwV0],
- gRSPlights, gRSP.fAmbientColors,
- &gRSPworldProject, &gRSPmodelViewTop,
- gRSPnumLights, gRSPfFogMin,
- gRDP.primitiveColor, gRDP.primitiveColor);
-#endif
-
const FiddledVtx & vert = pVtxBase[i - dwV0];
XVECTOR3 vtx_raw; // was g_vtxNonTransformed
g_fVtxTxtCoords[i].x = (float)vert.tu;
g_fVtxTxtCoords[i].y = (float)vert.tv;
-#ifdef DO_CMP
- do_cmp(i, s++ & 1, neon_state);
-#endif
- }
-#endif
-#ifdef DO_CC
- static int total, total_c;
- unsigned int cc;
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc));
- total += cc - cc_start;
- total_c += dwNum;
- if (total_c > 20000) {
- printf("%.u\n", total / total_c);
- total = total_c = 0;
}
#endif
}
// method doesnt' work well when the z value is outside of screenspace
//if (v0.z < 1 && v1.z < 1 && v2.z < 1)
{
+#ifndef __ARM_NEON__
float V1 = v2.x - v0.x;
float V2 = v2.y - v0.y;
float fDirection = (V1 * W2) - (V2 * W1);
fDirection = fDirection * v1.w * v2.w * v0.w;
//float fDirection = v0.x*v1.y-v1.x*v0.y+v1.x*v2.y-v2.x*v1.y+v2.x*v0.y-v0.x*v2.y;
+#else
+ // really returns float, but we only need sign
+ int fDirection = tv_direction(&v0, &v1, &v2);
+#endif
if (fDirection < 0 && gRSP.bCullBack)
{