From: notaz Date: Mon, 23 Jun 2014 00:09:02 +0000 (+0300) Subject: rice: optimize IsTriangleVisible X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3db2a2f9c2702f2f19530333a1f45651c9984a63;p=mupen64plus-pandora.git rice: optimize IsTriangleVisible --- diff --git a/source/gles2rice/src/RenderBase.cpp b/source/gles2rice/src/RenderBase.cpp index 1ba7ec8..9e95147 100644 --- a/source/gles2rice/src/RenderBase.cpp +++ b/source/gles2rice/src/RenderBase.cpp @@ -1553,6 +1553,8 @@ extern "C" void pv_neon(XVECTOR4 *g_vtxTransformed, XVECTOR4 *g_vecProjected, uint32 gRSPnumLights, float gRSPfFogMin, uint32 primitiveColor, uint32 primitiveColor_); +extern "C" int tv_direction(const XVECTOR4 *v0, const XVECTOR4 *v1, const XVECTOR4 *v2); + void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum) { if (gRSP.bTextureGen && gRSP.bLightingEnable) { @@ -1761,6 +1763,7 @@ bool IsTriangleVisible(uint32 dwV0, uint32 dwV1, uint32 dwV2) // method doesnt' work well when the z value is outside of screenspace //if (v0.z < 1 && v1.z < 1 && v2.z < 1) { +#ifndef __ARM_NEON__ float V1 = v2.x - v0.x; float V2 = v2.y - v0.y; @@ -1770,6 +1773,10 @@ bool IsTriangleVisible(uint32 dwV0, uint32 dwV1, uint32 dwV2) float fDirection = (V1 * W2) - (V2 * W1); fDirection = fDirection * v1.w * v2.w * v0.w; //float fDirection = v0.x*v1.y-v1.x*v0.y+v1.x*v2.y-v2.x*v1.y+v2.x*v0.y-v0.x*v2.y; +#else + // really returns float, but we only need sign + int fDirection = tv_direction(&v0, &v1, &v2); +#endif if (fDirection < 0 && gRSP.bCullBack) { diff --git a/source/gles2rice/src/RenderBase_neon.S b/source/gles2rice/src/RenderBase_neon.S index 3e60c58..4310947 100644 --- a/source/gles2rice/src/RenderBase_neon.S +++ b/source/gles2rice/src/RenderBase_neon.S @@ -310,4 +310,23 @@ FUNCTION(multiply_subtract2): .size multiply_subtract2, .-multiply_subtract2 +@ (const XVECTOR4 *v0, const XVECTOR4 *v1, const XVECTOR4 *v2) +FUNCTION(tv_direction): + vld1.32 {q0}, [r0] + vld1.32 {q2}, [r2] + vld1.32 {q1}, [r1] + vsub.f32 d6, d4, d0 @ d6 = V2,V1 + vsub.f32 d7, d4, d2 @ d7 = W2,W1 + vmul.f32 d1, d5 @ d1 = v0.w * v2.w + vrev64.32 d7, d7 + vmul.f32 d6, d7 @ d6 = V2*W1,V1*W2 + vmul.f32 d1, d3 @ d1 *= v1.w + vshr.u64 d7, d6, #32 + vsub.f32 d6, d7 @ d6[0] = V1*W2 - V2*W1 + vshr.u64 d1, d1, #32 + vmul.f32 d0, d1, d6 + vmov.32 r0, d0[0] + bx lr + + @ vim:filetype=armasm:expandtab