rice: optimize some uv calculations
[mupen64plus-pandora.git] / source / gles2rice / src / RenderBase.cpp
index d5d2f7c..1ba7ec8 100644 (file)
@@ -635,6 +635,12 @@ void InitRenderBase()
         ProcessVertexData = ProcessVertexDataSSE;
     }
     else
+#elif defined(__ARM_NEON__)
+    if( !g_curRomInfo.bPrimaryDepthHack && options.enableHackForGames != HACK_FOR_NASCAR && options.enableHackForGames != HACK_FOR_ZELDA_MM && !options.bWinFrameMode)
+    {
+        ProcessVertexData = ProcessVertexDataNEON;
+    }
+    else
 #endif
     {
         ProcessVertexData = ProcessVertexDataNoSSE;
@@ -848,6 +854,17 @@ void ComputeLOD(bool openGL)
 bool bHalfTxtScale=false;
 extern uint32 lastSetTile;
 
+#ifndef __ARM_NEON__
+static void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s)
+{
+    int i;
+    for (i = 0; i < 2; i++)
+        d[i] = m1[i] * m2[i] - s[i];
+}
+#else
+extern "C" void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s);
+#endif
+
 void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL)
 {
     VTX_DUMP(TRACE2("Init vertex (%d) to vtx buf[%d]:", dwV, vtxIndex));
@@ -953,22 +970,22 @@ void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL)
         }
         else
         {
-            float tex0u = g_fVtxTxtCoords[dwV].x *gRSP.tex0scaleX - gRSP.tex0OffsetX ;
-            float tex0v = g_fVtxTxtCoords[dwV].y *gRSP.tex0scaleY - gRSP.tex0OffsetY ;
+            TexCord tex0;
+            multiply_subtract2(&tex0.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex0scaleX, &gRSP.tex0OffsetX);
 
             if( CRender::g_pRender->IsTexel1Enable() )
             {
-                float tex1u = g_fVtxTxtCoords[dwV].x *gRSP.tex1scaleX - gRSP.tex1OffsetX ;
-                float tex1v = g_fVtxTxtCoords[dwV].y *gRSP.tex1scaleY - gRSP.tex1OffsetY ;
+                TexCord tex1;
+                multiply_subtract2(&tex1.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex1scaleX, &gRSP.tex1OffsetX);
 
-                CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v, tex1u, tex1v);
-                VTX_DUMP(TRACE2("  (tex0): %f, %f",  tex0u,tex0v));
-                VTX_DUMP(TRACE2("  (tex1): %f, %f",  tex1u,tex1v));
+                CRender::g_pRender->SetVertexTextureUVCoord(v, tex0, tex1);
+                VTX_DUMP(TRACE2("  (tex0): %f, %f",  tex0.u,tex0.v));
+                VTX_DUMP(TRACE2("  (tex1): %f, %f",  tex1.u,tex1.v));
             }
             else
             {
-                CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v);
-                VTX_DUMP(TRACE2("  (tex0): %f, %f",  tex0u,tex0v));
+                CRender::g_pRender->SetVertexTextureUVCoord(v, tex0);
+                VTX_DUMP(TRACE2("  (tex0): %f, %f",  tex0.u,tex0.v));
             }
         }
 
@@ -1522,6 +1539,168 @@ void ProcessVertexDataNoSSE(uint32 dwAddr, uint32 dwV0, uint32 dwNum)
     DEBUGGER_PAUSE_AND_DUMP(NEXT_VERTEX_CMD,{TRACE0("Paused at Vertex Cmd");});
 }
 
+/* NEON code */
+
+#include "RenderBase_neon.h"
+
+extern "C" void pv_neon(XVECTOR4 *g_vtxTransformed, XVECTOR4 *g_vecProjected,
+    uint32 *g_dwVtxDifColor, VECTOR2 *g_fVtxTxtCoords,
+    float *g_fFogCoord, uint32 *g_clipFlag2,
+    uint32 dwNum, int neon_state,
+    const FiddledVtx *vtx,
+    const Light *gRSPlights, const float *fRSPAmbientLightRGBA,
+    const XMATRIX *gRSPworldProject, const XMATRIX *gRSPmodelViewTop,
+    uint32 gRSPnumLights, float gRSPfFogMin,
+    uint32 primitiveColor, uint32 primitiveColor_);
+
+void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum)
+{
+    if (gRSP.bTextureGen && gRSP.bLightingEnable) {
+        ProcessVertexDataNoSSE(dwAddr, dwV0,dwNum);
+        return;
+    }
+
+    // assumtions:
+    // - g_clipFlag is not used at all
+    // - g_fFogCoord is not used at all
+    // - g_vtxNonTransformed is not used after ProcessVertexData*() returns
+    // - g_normal - same
+
+    int neon_state = 0;
+    if ( gRSP.bLightingEnable )
+        neon_state |= PV_NEON_ENABLE_LIGHT;
+    if ( (gRDP.geometryMode & G_SHADE) || gRSP.ucode >= 5 )
+        neon_state |= PV_NEON_ENABLE_SHADE;
+    if ( gRSP.bFogEnabled )
+        neon_state |= PV_NEON_ENABLE_FOG;
+    if ( gRDP.geometryMode & G_FOG )
+        neon_state |= PV_NEON_FOG_ALPHA;
+
+    uint32 i;
+
+    UpdateCombinedMatrix();
+
+    // This function is called upon SPvertex
+    // - do vertex matrix transform
+    // - do vertex lighting
+    // - do texture cooridinate transform if needed
+    // - calculate normal vector
+
+    // Output:  - g_vecProjected[i]             -> transformed vertex x,y,z
+    //          - g_vecProjected[i].w           -> saved vertex 1/w
+    //          - g_vtxTransformed[i]
+    //          - g_dwVtxDifColor[i]            -> vertex color
+    //          - g_fVtxTxtCoords[i]            -> vertex texture cooridinates
+    //          - g_fFogCoord[i]                -> unused
+    //          - g_clipFlag2[i]
+
+    const FiddledVtx * pVtxBase = (const FiddledVtx*)(g_pRDRAMu8 + dwAddr);
+    g_pVtxBase = (FiddledVtx *)pVtxBase;
+
+    gRSPmodelViewTop._14 = gRSPmodelViewTop._24 =
+    gRSPmodelViewTop._34 = 0;
+
+    // SP_Timing(RSP_GBI0_Vtx);
+    status.SPCycleCount += Timing_RSP_GBI0_Vtx * dwNum;
+
+#if 1
+    i = dwV0;
+    pv_neon(&g_vtxTransformed[i], &g_vecProjected[i],
+            &g_dwVtxDifColor[i], &g_fVtxTxtCoords[i],
+            &g_fFogCoord[i], &g_clipFlag2[i],
+            dwNum, neon_state, &pVtxBase[i - dwV0],
+            gRSPlights, gRSP.fAmbientColors,
+            &gRSPworldProject, &gRSPmodelViewTop,
+            gRSPnumLights, gRSPfFogMin,
+            gRDP.primitiveColor, gRDP.primitiveColor);
+#else
+    for (i = dwV0; i < dwV0 + dwNum; i++)
+    {
+        const FiddledVtx & vert = pVtxBase[i - dwV0];
+        XVECTOR3 vtx_raw; // was g_vtxNonTransformed
+
+        vtx_raw.x = (float)vert.x;
+        vtx_raw.y = (float)vert.y;
+        vtx_raw.z = (float)vert.z;
+
+        Vec3Transform(&g_vtxTransformed[i], &vtx_raw, &gRSPworldProject); // Convert to w=1
+
+        g_vecProjected[i].w = 1.0f / g_vtxTransformed[i].w;
+        g_vecProjected[i].x = g_vtxTransformed[i].x * g_vecProjected[i].w;
+        g_vecProjected[i].y = g_vtxTransformed[i].y * g_vecProjected[i].w;
+        g_vecProjected[i].z = g_vtxTransformed[i].z * g_vecProjected[i].w;
+
+        // RSP_Vtx_Clipping(i);
+        g_clipFlag2[i] = 0;
+        if( g_vecProjected[i].w > 0 )
+        {
+            if( g_vecProjected[i].x > 1 )   g_clipFlag2[i] |= X_CLIP_MAX;
+            if( g_vecProjected[i].x < -1 )  g_clipFlag2[i] |= X_CLIP_MIN;
+            if( g_vecProjected[i].y > 1 )   g_clipFlag2[i] |= Y_CLIP_MAX;
+            if( g_vecProjected[i].y < -1 )  g_clipFlag2[i] |= Y_CLIP_MIN;
+        }
+
+        if( neon_state & PV_NEON_ENABLE_LIGHT )
+        {
+            XVECTOR3 normal; // was g_normal
+            float r, g, b;
+
+            normal.x = (float)vert.norma.nx;
+            normal.y = (float)vert.norma.ny;
+            normal.z = (float)vert.norma.nz;
+
+            Vec3TransformNormal(normal, gRSPmodelViewTop);
+
+            r = gRSP.fAmbientLightR;
+            g = gRSP.fAmbientLightG;
+            b = gRSP.fAmbientLightB;
+
+            for (unsigned int l=0; l < gRSPnumLights; l++)
+            {
+                float fCosT = normal.x * gRSPlights[l].x + normal.y * gRSPlights[l].y + normal.z * gRSPlights[l].z; 
+
+                if (fCosT > 0 )
+                {
+                    r += gRSPlights[l].fr * fCosT;
+                    g += gRSPlights[l].fg * fCosT;
+                    b += gRSPlights[l].fb * fCosT;
+                }
+            }
+            if (r > 255) r = 255;
+            if (g > 255) g = 255;
+            if (b > 255) b = 255;
+            g_dwVtxDifColor[i] = ((vert.rgba.a<<24)|(((uint32)r)<<16)|(((uint32)g)<<8)|((uint32)b));
+        }
+        else if( neon_state & PV_NEON_ENABLE_SHADE )
+        {
+            IColor &color = *(IColor*)&g_dwVtxDifColor[i];
+            color.b = vert.rgba.r;
+            color.g = vert.rgba.g;
+            color.r = vert.rgba.b;
+            color.a = vert.rgba.a;
+        }
+        else
+            g_dwVtxDifColor[i] = gRDP.primitiveColor; // FLAT shade
+
+        // ReplaceAlphaWithFogFactor(i);
+        if( neon_state & PV_NEON_FOG_ALPHA )
+        {
+            // Use fog factor to replace vertex alpha
+            if( g_vecProjected[i].z > 1 )
+                *(((uint8*)&(g_dwVtxDifColor[i]))+3) = 0xFF;
+            // missing 'else' in original code??
+            else if( g_vecProjected[i].z < 0 )
+                *(((uint8*)&(g_dwVtxDifColor[i]))+3) = 0;
+            else
+                *(((uint8*)&(g_dwVtxDifColor[i]))+3) = (uint8)(g_vecProjected[i].z*255);    
+        }
+
+        g_fVtxTxtCoords[i].x = (float)vert.tu;
+        g_fVtxTxtCoords[i].y = (float)vert.tv; 
+    }
+#endif
+}
+
 bool PrepareTriangle(uint32 dwV0, uint32 dwV1, uint32 dwV2)
 {
     if( status.isVertexShaderEnabled || status.bUseHW_T_L )