X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=source%2Fgles2rice%2Fsrc%2FRenderBase.cpp;h=78c7f90cf43ec9873a47b706748af7aacd174c61;hb=5288f5429f38c99c73856e285d35e7d0c4c779d0;hp=429f51d3052909b39486940f644b2fb6dd8b6a37;hpb=61b9f2dfb3e20d2e2e7efda30cf459df5134d88f;p=mupen64plus-pandora.git diff --git a/source/gles2rice/src/RenderBase.cpp b/source/gles2rice/src/RenderBase.cpp index 429f51d..78c7f90 100644 --- a/source/gles2rice/src/RenderBase.cpp +++ b/source/gles2rice/src/RenderBase.cpp @@ -854,10 +854,90 @@ void ComputeLOD(bool openGL) bool bHalfTxtScale=false; extern uint32 lastSetTile; +#define noinline __attribute__((noinline)) + +static noinline void InitVertex_scale_hack_check(uint32 dwV) +{ + // Check for txt scale hack + if( gRDP.tiles[lastSetTile].dwSize == TXT_SIZE_32b || gRDP.tiles[lastSetTile].dwSize == TXT_SIZE_4b ) + { + int width = ((gRDP.tiles[lastSetTile].sh-gRDP.tiles[lastSetTile].sl+1)<<1); + int height = ((gRDP.tiles[lastSetTile].th-gRDP.tiles[lastSetTile].tl+1)<<1); + if( g_fVtxTxtCoords[dwV].x*gRSP.fTexScaleX == width || g_fVtxTxtCoords[dwV].y*gRSP.fTexScaleY == height ) + { + bHalfTxtScale=true; + } + } +} + +static noinline void InitVertex_notopengl_or_clipper_adjust(TLITVERTEX &v, uint32 dwV) +{ + v.x = g_vecProjected[dwV].x*gRSP.vtxXMul+gRSP.vtxXAdd; + v.y = g_vecProjected[dwV].y*gRSP.vtxYMul+gRSP.vtxYAdd; + v.z = (g_vecProjected[dwV].z + 1.0f) * 0.5f; // DirectX minZ=0, maxZ=1 + //v.z = g_vecProjected[dwV].z; // DirectX minZ=0, maxZ=1 + v.rhw = g_vecProjected[dwV].w; + VTX_DUMP(TRACE4(" Proj : x=%f, y=%f, z=%f, rhw=%f", v.x,v.y,v.z,v.rhw)); + + if( gRSP.bProcessSpecularColor ) + { + v.dcSpecular = CRender::g_pRender->PostProcessSpecularColor(); + if( gRSP.bFogEnabled ) + { + v.dcSpecular &= 0x00FFFFFF; + uint32 fogFct = 0xFF-(uint8)((g_fFogCoord[dwV]-gRSPfFogMin)*gRSPfFogDivider); + v.dcSpecular |= (fogFct<<24); + } + } + else if( gRSP.bFogEnabled ) + { + uint32 fogFct = 0xFF-(uint8)((g_fFogCoord[dwV]-gRSPfFogMin)*gRSPfFogDivider); + v.dcSpecular = (fogFct<<24); + } +} + +static noinline void InitVertex_texgen_correct(TLITVERTEX &v, uint32 dwV) +{ + // Correction for texGen result + float u0,u1,v0,v1; + RenderTexture &tex0 = g_textures[gRSP.curTile]; + u0 = g_fVtxTxtCoords[dwV].x * 32 * 1024 * gRSP.fTexScaleX / tex0.m_fTexWidth; + v0 = g_fVtxTxtCoords[dwV].y * 32 * 1024 * gRSP.fTexScaleY / tex0.m_fTexHeight; + u0 *= (gRDP.tiles[gRSP.curTile].fShiftScaleS); + v0 *= (gRDP.tiles[gRSP.curTile].fShiftScaleT); + + if( CRender::g_pRender->IsTexel1Enable() ) + { + RenderTexture &tex1 = g_textures[(gRSP.curTile+1)&7]; + u1 = g_fVtxTxtCoords[dwV].x * 32 * 1024 * gRSP.fTexScaleX / tex1.m_fTexWidth; + v1 = g_fVtxTxtCoords[dwV].y * 32 * 1024 * gRSP.fTexScaleY / tex1.m_fTexHeight; + u1 *= gRDP.tiles[(gRSP.curTile+1)&7].fShiftScaleS; + v1 *= gRDP.tiles[(gRSP.curTile+1)&7].fShiftScaleT; + CRender::g_pRender->SetVertexTextureUVCoord(v, u0, v0, u1, v1); + } + else + { + CRender::g_pRender->SetVertexTextureUVCoord(v, u0, v0); + } +} + +#ifndef __ARM_NEON__ +static void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s) +{ + int i; + for (i = 0; i < 2; i++) + d[i] = m1[i] * m2[i] - s[i]; +} +#else +extern "C" void multiply_subtract2(float *d, const float *m1, const float *m2, const float *s); +#endif + void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL) { VTX_DUMP(TRACE2("Init vertex (%d) to vtx buf[%d]:", dwV, vtxIndex)); - +#ifdef __linux__ + openGL = 1; // what else there is? +#endif TLITVERTEX &v = g_vtxBuffer[vtxIndex]; VTX_DUMP(TRACE4(" Trans: x=%f, y=%f, z=%f, w=%f", g_vtxTransformed[dwV].x,g_vtxTransformed[dwV].y,g_vtxTransformed[dwV].z,g_vtxTransformed[dwV].w)); if( openGL ) @@ -868,36 +948,15 @@ void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL) g_vtxProjected5[vtxIndex][3] = g_vtxTransformed[dwV].w; g_vtxProjected5[vtxIndex][4] = g_vecProjected[dwV].z; - if( g_vtxTransformed[dwV].w < 0 ) + if( *(int *)&g_vtxTransformed[dwV].w < 0 ) g_vtxProjected5[vtxIndex][4] = 0; g_vtxIndex[vtxIndex] = vtxIndex; } - if( !openGL || options.bOGLVertexClipper == TRUE ) + if( __builtin_expect(!openGL || options.bOGLVertexClipper == TRUE, 0) ) { - v.x = g_vecProjected[dwV].x*gRSP.vtxXMul+gRSP.vtxXAdd; - v.y = g_vecProjected[dwV].y*gRSP.vtxYMul+gRSP.vtxYAdd; - v.z = (g_vecProjected[dwV].z + 1.0f) * 0.5f; // DirectX minZ=0, maxZ=1 - //v.z = g_vecProjected[dwV].z; // DirectX minZ=0, maxZ=1 - v.rhw = g_vecProjected[dwV].w; - VTX_DUMP(TRACE4(" Proj : x=%f, y=%f, z=%f, rhw=%f", v.x,v.y,v.z,v.rhw)); - - if( gRSP.bProcessSpecularColor ) - { - v.dcSpecular = CRender::g_pRender->PostProcessSpecularColor(); - if( gRSP.bFogEnabled ) - { - v.dcSpecular &= 0x00FFFFFF; - uint32 fogFct = 0xFF-(uint8)((g_fFogCoord[dwV]-gRSPfFogMin)*gRSPfFogDivider); - v.dcSpecular |= (fogFct<<24); - } - } - else if( gRSP.bFogEnabled ) - { - uint32 fogFct = 0xFF-(uint8)((g_fFogCoord[dwV]-gRSPfFogMin)*gRSPfFogDivider); - v.dcSpecular = (fogFct<<24); - } + InitVertex_notopengl_or_clipper_adjust(v, dwV); } VTX_DUMP(TRACE2(" (U,V): %f, %f", g_fVtxTxtCoords[dwV].x,g_fVtxTxtCoords[dwV].y)); @@ -933,74 +992,33 @@ void InitVertex(uint32 dwV, uint32 vtxIndex, bool bTexture, bool openGL) { // If the vert is already lit, then there is no normal (and hence we can't generate tex coord) // Only scale if not generated automatically - if (gRSP.bTextureGen && gRSP.bLightingEnable) + if ( __builtin_expect(gRSP.bTextureGen && gRSP.bLightingEnable, 0) ) { - // Correction for texGen result - float u0,u1,v0,v1; - RenderTexture &tex0 = g_textures[gRSP.curTile]; - u0 = g_fVtxTxtCoords[dwV].x * 32 * 1024 * gRSP.fTexScaleX / tex0.m_fTexWidth; - v0 = g_fVtxTxtCoords[dwV].y * 32 * 1024 * gRSP.fTexScaleY / tex0.m_fTexHeight; - u0 *= (gRDP.tiles[gRSP.curTile].fShiftScaleS); - v0 *= (gRDP.tiles[gRSP.curTile].fShiftScaleT); - - if( CRender::g_pRender->IsTexel1Enable() ) - { - RenderTexture &tex1 = g_textures[(gRSP.curTile+1)&7]; - u1 = g_fVtxTxtCoords[dwV].x * 32 * 1024 * gRSP.fTexScaleX / tex1.m_fTexWidth; - v1 = g_fVtxTxtCoords[dwV].y * 32 * 1024 * gRSP.fTexScaleY / tex1.m_fTexHeight; - u1 *= gRDP.tiles[(gRSP.curTile+1)&7].fShiftScaleS; - v1 *= gRDP.tiles[(gRSP.curTile+1)&7].fShiftScaleT; - CRender::g_pRender->SetVertexTextureUVCoord(v, u0, v0, u1, v1); - } - else - { - CRender::g_pRender->SetVertexTextureUVCoord(v, u0, v0); - } + InitVertex_texgen_correct(v, dwV); } else { - float tex0u = g_fVtxTxtCoords[dwV].x *gRSP.tex0scaleX - gRSP.tex0OffsetX ; - float tex0v = g_fVtxTxtCoords[dwV].y *gRSP.tex0scaleY - gRSP.tex0OffsetY ; + TexCord tex0; + multiply_subtract2(&tex0.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex0scaleX, &gRSP.tex0OffsetX); if( CRender::g_pRender->IsTexel1Enable() ) { - float tex1u = g_fVtxTxtCoords[dwV].x *gRSP.tex1scaleX - gRSP.tex1OffsetX ; - float tex1v = g_fVtxTxtCoords[dwV].y *gRSP.tex1scaleY - gRSP.tex1OffsetY ; + TexCord tex1; + multiply_subtract2(&tex1.u, &g_fVtxTxtCoords[dwV].x, &gRSP.tex1scaleX, &gRSP.tex1OffsetX); - CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v, tex1u, tex1v); - VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0u,tex0v)); - VTX_DUMP(TRACE2(" (tex1): %f, %f", tex1u,tex1v)); + CRender::g_pRender->SetVertexTextureUVCoord(v, tex0, tex1); + VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0.u,tex0.v)); + VTX_DUMP(TRACE2(" (tex1): %f, %f", tex1.u,tex1.v)); } else { - CRender::g_pRender->SetVertexTextureUVCoord(v, tex0u, tex0v); - VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0u,tex0v)); + CRender::g_pRender->SetVertexTextureUVCoord(v, tex0); + VTX_DUMP(TRACE2(" (tex0): %f, %f", tex0.u,tex0.v)); } } - // Check for txt scale hack - if( !bHalfTxtScale && g_curRomInfo.bTextureScaleHack && - (gRDP.tiles[lastSetTile].dwSize == TXT_SIZE_32b || gRDP.tiles[lastSetTile].dwSize == TXT_SIZE_4b ) ) - { - int width = ((gRDP.tiles[lastSetTile].sh-gRDP.tiles[lastSetTile].sl+1)<<1); - int height = ((gRDP.tiles[lastSetTile].th-gRDP.tiles[lastSetTile].tl+1)<<1); - if( g_fVtxTxtCoords[dwV].x*gRSP.fTexScaleX == width || g_fVtxTxtCoords[dwV].y*gRSP.fTexScaleY == height ) - { - bHalfTxtScale=true; - } - } - } - - if( g_curRomInfo.bEnableTxtLOD && vtxIndex == 1 && gRDP.otherMode.text_lod ) - { - if( CRender::g_pRender->IsTexel1Enable() && CRender::g_pRender->m_pColorCombiner->m_pDecodedMux->isUsed(MUX_LODFRAC) ) - { - ComputeLOD(openGL); - } - else - { - gRDP.LODFrac = 0; - } + if( __builtin_expect(g_curRomInfo.bTextureScaleHack && !bHalfTxtScale, 0) ) + InitVertex_scale_hack_check(dwV); } VTX_DUMP(TRACE2(" DIF(%08X), SPE(%08X)", v.dcDiffuse, v.dcSpecular)); @@ -1542,96 +1560,7 @@ extern "C" void pv_neon(XVECTOR4 *g_vtxTransformed, XVECTOR4 *g_vecProjected, uint32 gRSPnumLights, float gRSPfFogMin, uint32 primitiveColor, uint32 primitiveColor_); -// debug -//#define DO_CMP -#ifdef DO_CMP -// note: don't forget -fno-associative-math -static XVECTOR4 n_transformed[2], n_projected[2]; -static uint32 n_color[2]; -static VECTOR2 n_vtxcoords[2]; -static float n_fogcoord[2]; -static uint32 n_clipflag2[2]; - -static int do_cmp_f(void *a, void *b, int c) -{ - int *ia = (int *)a, *ib = (int *)b; - for (int i = 0; i < c; i++) { - int di = abs(ia[i] - ib[i]); - if (di > 7) { - printf("di: %d\n", di); - return 1; - } - } - return 0; -} - -static int do_cmp_c(uint32 a, uint32 b) -{ - if (abs(((a >> 0) & 0xff) - ((b >> 0) & 0xff)) > 1) - return 1; - if (abs(((a >> 8) & 0xff) - ((b >> 8) & 0xff)) > 1) - return 1; - if (abs(((a >> 16) & 0xff) - ((b >> 16) & 0xff)) > 1) - return 1; - if (abs(((a >> 24) & 0xff) - ((b >> 24) & 0xff)) > 1) - return 1; - - return 0; -} - -static void do_cmp(int i, int s, int neon_state) -{ - static int ccnt; - int bad = 0; - - // if (memcmp(&n_transformed, &g_vtxTransformed[i], sizeof(XVECTOR4))) - if (do_cmp_f(&n_transformed[s], &g_vtxTransformed[i], 4)) { - printf("transformed:\n%13.8e %13.8e %13.8e %13.8e\n" - "%13.8e %13.8e %13.8e %13.8e\n", - n_transformed[s].x, n_transformed[s].y, - n_transformed[s].z, n_transformed[s].w, - g_vtxTransformed[i].x, g_vtxTransformed[i].y, - g_vtxTransformed[i].z, g_vtxTransformed[i].w); - bad = 1; - } - if (do_cmp_f(&n_projected[s], &g_vecProjected[i], 4)) { - printf("projected:\n%13.8e %13.8e %13.8e %13.8e |%08x\n" - "%13.8e %13.8e %13.8e %13.8e |%08x\n", - n_projected[s].x, n_projected[s].y, - n_projected[s].z, n_projected[s].w, - *(uint32 *)&n_projected[s].w, - g_vecProjected[i].x, g_vecProjected[i].y, - g_vecProjected[i].z, g_vecProjected[i].w, - *(uint32 *)&g_vecProjected[i].w); - bad = 1; - } - if (n_vtxcoords[s].x != g_fVtxTxtCoords[i].x - || n_vtxcoords[s].y != g_fVtxTxtCoords[i].y) - { - printf("vtxcoords:\n%13.8e %13.8e\n%13.8e %13.8e\n", - n_vtxcoords[s].x, n_vtxcoords[s].y, - g_fVtxTxtCoords[i].x, g_fVtxTxtCoords[i].y); - bad = 1; - } - if (n_clipflag2[s] != g_clipFlag2[i]) { - printf("clipflag2: %08x %08x\n", n_clipflag2[s], g_clipFlag2[i]); - bad = 1; - } - if (do_cmp_c(n_color[s], g_dwVtxDifColor[i])) { - printf("n_color: %08x %08x\n", n_color[s], g_dwVtxDifColor[i]); - bad = 1; - } - if (!(neon_state & PV_NEON_ENABLE_SHADE)) - printf("!ENABLE_SHADE!\n"); - if (bad) { - printf("%d s=%d, state %02x\n", ccnt, s, neon_state); - printf(".w %08x %08x\n", - *(uint32 *)&n_projected[s].w, *(uint32 *)&g_vecProjected[i].w); - exit(ccnt); - } - ccnt++; -} -#endif +extern "C" int tv_direction(const XVECTOR4 *v0, const XVECTOR4 *v1, const XVECTOR4 *v2); void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum) { @@ -1657,9 +1586,6 @@ void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum) neon_state |= PV_NEON_FOG_ALPHA; uint32 i; -#ifdef DO_CMP - uint32 s = 0; -#endif UpdateCombinedMatrix(); @@ -1686,18 +1612,6 @@ void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum) // SP_Timing(RSP_GBI0_Vtx); status.SPCycleCount += Timing_RSP_GBI0_Vtx * dwNum; -//#define DO_CC -#ifdef DO_CC - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(i)); - i |= 5; // master enable, ccnt reset - i &= ~8; // ccnt divider 0 - asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(i)); - // enable cycle counter - asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(1<<31)); - unsigned int cc_start; - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc_start)); -#endif - #if 1 i = dwV0; pv_neon(&g_vtxTransformed[i], &g_vecProjected[i], @@ -1711,18 +1625,6 @@ void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum) #else for (i = dwV0; i < dwV0 + dwNum; i++) { -#ifdef DO_CMP - if (!(s & 1)) - pv_neon(n_transformed, n_projected, - n_color, n_vtxcoords, - n_fogcoord, n_clipflag2, - 1, neon_state, &pVtxBase[i - dwV0], - gRSPlights, gRSP.fAmbientColors, - &gRSPworldProject, &gRSPmodelViewTop, - gRSPnumLights, gRSPfFogMin, - gRDP.primitiveColor, gRDP.primitiveColor); -#endif - const FiddledVtx & vert = pVtxBase[i - dwV0]; XVECTOR3 vtx_raw; // was g_vtxNonTransformed @@ -1804,20 +1706,6 @@ void ProcessVertexDataNEON(uint32 dwAddr, uint32 dwV0, uint32 dwNum) g_fVtxTxtCoords[i].x = (float)vert.tu; g_fVtxTxtCoords[i].y = (float)vert.tv; -#ifdef DO_CMP - do_cmp(i, s++ & 1, neon_state); -#endif - } -#endif -#ifdef DO_CC - static int total, total_c; - unsigned int cc; - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc)); - total += cc - cc_start; - total_c += dwNum; - if (total_c > 20000) { - printf("%.u\n", total / total_c); - total = total_c = 0; } #endif } @@ -1843,6 +1731,18 @@ bool PrepareTriangle(uint32 dwV0, uint32 dwV1, uint32 dwV2) InitVertex(dwV1, gRSP.numVertices+1, textureFlag, openGL); InitVertex(dwV2, gRSP.numVertices+2, textureFlag, openGL); + if( __builtin_expect(gRSP.numVertices == 0 && g_curRomInfo.bEnableTxtLOD && gRDP.otherMode.text_lod, 0) ) + { + if( CRender::g_pRender->IsTexel1Enable() && CRender::g_pRender->m_pColorCombiner->m_pDecodedMux->isUsed(MUX_LODFRAC) ) + { + ComputeLOD(openGL); + } + else + { + gRDP.LODFrac = 0; + } + } + gRSP.numVertices += 3; status.dwNumTrisRendered++; } @@ -1882,6 +1782,7 @@ bool IsTriangleVisible(uint32 dwV0, uint32 dwV1, uint32 dwV2) // method doesnt' work well when the z value is outside of screenspace //if (v0.z < 1 && v1.z < 1 && v2.z < 1) { +#ifndef __ARM_NEON__ float V1 = v2.x - v0.x; float V2 = v2.y - v0.y; @@ -1891,6 +1792,10 @@ bool IsTriangleVisible(uint32 dwV0, uint32 dwV1, uint32 dwV2) float fDirection = (V1 * W2) - (V2 * W1); fDirection = fDirection * v1.w * v2.w * v0.w; //float fDirection = v0.x*v1.y-v1.x*v0.y+v1.x*v2.y-v2.x*v1.y+v2.x*v0.y-v0.x*v2.y; +#else + // really returns float, but we only need sign + int fDirection = tv_direction(&v0, &v1, &v2); +#endif if (fDirection < 0 && gRSP.bCullBack) {