From d6e5b275c693adc4cbd7287c0c2c7abaa9b0da1a Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 23 Jun 2014 01:56:38 +0300 Subject: [PATCH] rice: optimize some uv calculations --- source/gles2rice/src/Render.cpp | 48 +++++++++++++++++--------- source/gles2rice/src/Render.h | 2 ++ source/gles2rice/src/RenderBase.cpp | 29 +++++++++++----- source/gles2rice/src/RenderBase_neon.S | 12 +++++++ 4 files changed, 66 insertions(+), 25 deletions(-) diff --git a/source/gles2rice/src/Render.cpp b/source/gles2rice/src/Render.cpp index 068fb30..64868a4 100755 --- a/source/gles2rice/src/Render.cpp +++ b/source/gles2rice/src/Render.cpp @@ -1677,10 +1677,12 @@ void CRender::SaveTextureToFile(int tex, TextureChannel channel, bool bShow) #endif extern RenderTextureInfo gRenderTextureInfos[]; -void SetVertexTextureUVCoord(TexCord &dst, float s, float t, int tile, TxtrCacheEntry *pEntry) +void SetVertexTextureUVCoord(TexCord &dst, const TexCord &src, int tile, TxtrCacheEntry *pEntry) { RenderTexture &txtr = g_textures[tile]; RenderTextureInfo &info = gRenderTextureInfos[pEntry->txtrBufIdx-1]; + float s = src.u; + float t = src.v; uint32 addrOffset = g_TI.dwAddr-info.CI_Info.dwAddr; uint32 extraTop = (addrOffset>>(info.CI_Info.dwSize-1)) /info.CI_Info.dwWidth; @@ -1700,21 +1702,30 @@ void SetVertexTextureUVCoord(TexCord &dst, float s, float t, int tile, TxtrCache dst.v = t; } -void CRender::SetVertexTextureUVCoord(TLITVERTEX &v, float fTex0S, float fTex0T) +void CRender::SetVertexTextureUVCoord(TLITVERTEX &v, const TexCord &fTex0) { RenderTexture &txtr = g_textures[0]; if( txtr.pTextureEntry && txtr.pTextureEntry->txtrBufIdx > 0 ) { - ::SetVertexTextureUVCoord(v.tcord[0], fTex0S, fTex0T, 0, txtr.pTextureEntry); + ::SetVertexTextureUVCoord(v.tcord[0], fTex0, 0, txtr.pTextureEntry); } else { - v.tcord[0].u = fTex0S; - v.tcord[0].v = fTex0T; + v.tcord[0] = fTex0; } } -void CRender::SetVertexTextureUVCoord(TLITVERTEX &v, float fTex0S, float fTex0T, float fTex1S, float fTex1T) + +void CRender::SetVertexTextureUVCoord(TLITVERTEX &v, float fTex0S, float fTex0T) +{ + TexCord t = { fTex0S, fTex0T }; + SetVertexTextureUVCoord(v, t); +} + +void CRender::SetVertexTextureUVCoord(TLITVERTEX &v, const TexCord &fTex0_, const TexCord &fTex1_) { + TexCord fTex0 = fTex0_; + TexCord fTex1 = fTex1_; + if( (options.enableHackForGames == HACK_FOR_ZELDA||options.enableHackForGames == HACK_FOR_ZELDA_MM) && m_Mux == 0x00262a60150c937fLL && gRSP.curTile == 0 ) { // Hack for Zelda Sun @@ -1724,36 +1735,41 @@ void CRender::SetVertexTextureUVCoord(TLITVERTEX &v, float fTex0S, float fTex0T, t1.dwFormat == TXT_FMT_I && t1.dwSize == TXT_SIZE_8b && t1.dwWidth == 64 && t0.dwHeight == t1.dwHeight ) { - fTex0S /= 2; - fTex0T /= 2; - fTex1S /= 2; - fTex1T /= 2; + fTex0.u /= 2; + fTex0.v /= 2; + fTex1.u /= 2; + fTex1.v /= 2; } } RenderTexture &txtr0 = g_textures[0]; if( txtr0.pTextureEntry && txtr0.pTextureEntry->txtrBufIdx > 0 ) { - ::SetVertexTextureUVCoord(v.tcord[0], fTex0S, fTex0T, 0, txtr0.pTextureEntry); + ::SetVertexTextureUVCoord(v.tcord[0], fTex0, 0, txtr0.pTextureEntry); } else { - v.tcord[0].u = fTex0S; - v.tcord[0].v = fTex0T; + v.tcord[0] = fTex0; } RenderTexture &txtr1 = g_textures[1]; if( txtr1.pTextureEntry && txtr1.pTextureEntry->txtrBufIdx > 0 ) { - ::SetVertexTextureUVCoord(v.tcord[1], fTex1S, fTex1T, 1, txtr1.pTextureEntry); + ::SetVertexTextureUVCoord(v.tcord[1], fTex1, 1, txtr1.pTextureEntry); } else { - v.tcord[1].u = fTex1S; - v.tcord[1].v = fTex1T; + v.tcord[1] = fTex1; } } +void CRender::SetVertexTextureUVCoord(TLITVERTEX &v, float fTex0S, float fTex0T, float fTex1S, float fTex1T) +{ + TexCord t0 = { fTex0S, fTex0T }; + TexCord t1 = { fTex1S, fTex1T }; + SetVertexTextureUVCoord(v, t0, t1); +} + void CRender::SetClipRatio(uint32 type, uint32 w1) { bool modified = false; diff --git a/source/gles2rice/src/Render.h b/source/gles2rice/src/Render.h index 9ae2849..02e7b3b 100644 --- a/source/gles2rice/src/Render.h +++ b/source/gles2rice/src/Render.h @@ -168,6 +168,8 @@ public: void SetVertexTextureUVCoord(TLITVERTEX &v, float fTex0S, float fTex0T, float fTex1S, float fTex1T); void SetVertexTextureUVCoord(TLITVERTEX &v, float fTex0S, float fTex0T); + void SetVertexTextureUVCoord(TLITVERTEX &v, const TexCord &fTex0, const TexCord &fTex1); + void SetVertexTextureUVCoord(TLITVERTEX &v, const TexCord &fTex0); virtual COLOR PostProcessDiffuseColor(COLOR curDiffuseColor)=0; virtual COLOR PostProcessSpecularColor()=0; diff --git a/source/gles2rice/src/RenderBase.cpp b/source/gles2rice/src/RenderBase.cpp index 1598618..1ba7ec8 100644 --- a/source/gles2rice/src/RenderBase.cpp +++ b/source/gles2rice/src/RenderBase.cpp @@ -854,6 +854,17 @@ void ComputeLOD(bool openGL) bool bHalfTxtScale=false; extern uint32 lastSetTile; +#ifndef __ARM_NEON__ +static void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s) +{ + int i; + for (i = 0; i < 2; i++) + d[i] = m1[i] * m2[i] - s[i]; +} +#else +extern "C" void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s); +#endif + void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL) { VTX_DUMP(TRACE2("Init vertex (%d) to vtx buf[%d]:", dwV, vtxIndex)); @@ -959,22 +970,22 @@ void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL) } else { - float tex0u = g_fVtxTxtCoords[dwV].x *gRSP.tex0scaleX - gRSP.tex0OffsetX ; - float tex0v = g_fVtxTxtCoords[dwV].y *gRSP.tex0scaleY - gRSP.tex0OffsetY ; + TexCord tex0; + multiply_subtract2(&tex0.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex0scaleX, &gRSP.tex0OffsetX); if( CRender::g_pRender->IsTexel1Enable() ) { - float tex1u = g_fVtxTxtCoords[dwV].x *gRSP.tex1scaleX - gRSP.tex1OffsetX ; - float tex1v = g_fVtxTxtCoords[dwV].y *gRSP.tex1scaleY - gRSP.tex1OffsetY ; + TexCord tex1; + multiply_subtract2(&tex1.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex1scaleX, &gRSP.tex1OffsetX); - CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v, tex1u, tex1v); - VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0u,tex0v)); - VTX_DUMP(TRACE2(" (tex1): %f, %f", tex1u,tex1v)); + CRender::g_pRender->SetVertexTextureUVCoord(v, tex0, tex1); + VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0.u,tex0.v)); + VTX_DUMP(TRACE2(" (tex1): %f, %f", tex1.u,tex1.v)); } else { - CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v); - VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0u,tex0v)); + CRender::g_pRender->SetVertexTextureUVCoord(v, tex0); + VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0.u,tex0.v)); } } diff --git a/source/gles2rice/src/RenderBase_neon.S b/source/gles2rice/src/RenderBase_neon.S index 08df333..3e60c58 100644 --- a/source/gles2rice/src/RenderBase_neon.S +++ b/source/gles2rice/src/RenderBase_neon.S @@ -298,4 +298,16 @@ pv_neon_next: .size pv_neon, .-pv_neon +@ (float *d, const float *m1, const float *m2, const float *s) +FUNCTION(multiply_subtract2): + vld1.32 {d1}, [r1] + vld1.32 {d2}, [r2] + vmul.f32 d0, d1, d2 + vld1.32 {d3}, [r3] + vsub.f32 d0, d3 + vst1.32 {d0}, [r0] + bx lr + .size multiply_subtract2, .-multiply_subtract2 + + @ vim:filetype=armasm:expandtab -- 2.39.5