X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=source%2Fgles2glide64%2Fpandora.diff;fp=source%2Fgles2glide64%2Fpandora.diff;h=b1a10fec9b094e2b0f7d42be95c057f6dc4af721;hb=98e75f2d18c02c233da543560f76282f04fc796c;hp=0000000000000000000000000000000000000000;hpb=0ced54f867d36e8b324155bef49e8abfebfc3237;p=mupen64plus-pandora.git diff --git a/source/gles2glide64/pandora.diff b/source/gles2glide64/pandora.diff new file mode 100644 index 0000000..b1a10fe --- /dev/null +++ b/source/gles2glide64/pandora.diff @@ -0,0 +1,1500 @@ +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp ./Glide64/3dmath.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp 2013-09-06 22:05:28.000000000 +0200 ++++ ./Glide64/3dmath.cpp 2013-09-14 09:41:13.000000000 +0200 +@@ -202,15 +202,109 @@ + } + } + ++#ifdef __ARM_NEON__ ++void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4]) ++{ ++ asm volatile ( ++ "vld1.32 {d0, d1}, [%1]! \n\t" //q0 = m1 ++ "vld1.32 {d2, d3}, [%1]! \n\t" //q1 = m1+4 ++ "vld1.32 {d4, d5}, [%1]! \n\t" //q2 = m1+8 ++ "vld1.32 {d6, d7}, [%1] \n\t" //q3 = m1+12 ++ "vld1.32 {d16, d17}, [%0]! \n\t" //q8 = m0 ++ "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m0+4 ++ "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m0+8 ++ "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m0+12 ++ ++ "vmul.f32 q12, q8, d0[0] \n\t" //q12 = q8 * d0[0] ++ "vmul.f32 q13, q8, d2[0] \n\t" //q13 = q8 * d2[0] ++ "vmul.f32 q14, q8, d4[0] \n\t" //q14 = q8 * d4[0] ++ "vmul.f32 q15, q8, d6[0] \n\t" //q15 = q8 * d6[0] ++ "vmla.f32 q12, q9, d0[1] \n\t" //q12 = q9 * d0[1] ++ "vmla.f32 q13, q9, d2[1] \n\t" //q13 = q9 * d2[1] ++ "vmla.f32 q14, q9, d4[1] \n\t" //q14 = q9 * d4[1] ++ "vmla.f32 q15, q9, d6[1] \n\t" //q15 = q9 * d6[1] ++ "vmla.f32 q12, q10, d1[0] \n\t" //q12 = q10 * d0[0] ++ "vmla.f32 q13, q10, d3[0] \n\t" //q13 = q10 * d2[0] ++ "vmla.f32 q14, q10, d5[0] \n\t" //q14 = q10 * d4[0] ++ "vmla.f32 q15, q10, d7[0] \n\t" //q15 = q10 * d6[0] ++ "vmla.f32 q12, q11, d1[1] \n\t" //q12 = q11 * d0[1] ++ "vmla.f32 q13, q11, d3[1] \n\t" //q13 = q11 * d2[1] ++ "vmla.f32 q14, q11, d5[1] \n\t" //q14 = q11 * d4[1] ++ "vmla.f32 q15, q11, d7[1] \n\t" //q15 = q11 * d6[1] ++ ++ "vst1.32 {d24, d25}, [%2]! \n\t" //d = q12 ++ "vst1.32 {d26, d27}, [%2]! \n\t" //d+4 = q13 ++ "vst1.32 {d28, d29}, [%2]! \n\t" //d+8 = q14 ++ "vst1.32 {d30, d31}, [%2] \n\t" //d+12 = q15 ++ ++ :"+r"(m0), "+r"(m1), "+r"(dest): ++ : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", ++ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", ++ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", ++ "memory" ++ ); ++} ++ ++void Normalize_neon(float v[3]) ++{ ++ asm volatile ( ++ "vld1.32 {d4}, [%0]! \n\t" //d4={x,y} ++ "flds s10, [%0] \n\t" //d5[0] = z ++ "sub %0, %0, #8 \n\t" //d5[0] = z ++ "vmul.f32 d0, d4, d4 \n\t" //d0= d4*d4 ++ "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1] ++ "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5 ++ ++ "vmov.f32 d1, d0 \n\t" //d1 = d0 ++ "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0) ++ "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 ++ "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2 ++ "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3 ++ "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 ++ "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2 ++ "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4 ++ ++ "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4 ++ "vst1.32 {d4}, [%0]! \n\t" //d2={x0,y0}, d3={z0, w0} ++ "fsts s10, [%0] \n\t" //d2={x0,y0}, d3={z0, w0} ++ ++ :"+r"(v) : ++ : "d0", "d1", "d2", "d3", "d4", "d5", "memory" ++ ); ++} ++ ++float DotProduct_neon( float v0[3], float v1[3] ) ++{ ++ float dot; ++ asm volatile ( ++ "vld1.32 {d8}, [%1]! \n\t" //d8={x0,y0} ++ "vld1.32 {d10}, [%2]! \n\t" //d10={x1,y1} ++ "flds s18, [%1, #0] \n\t" //d9[0]={z0} ++ "flds s22, [%2, #0] \n\t" //d11[0]={z1} ++ "vmul.f32 d12, d8, d10 \n\t" //d0= d2*d4 ++ "vpadd.f32 d12, d12, d12 \n\t" //d0 = d[0] + d[1] ++ "vmla.f32 d12, d9, d11 \n\t" //d0 = d0 + d3*d5 ++ "fmrs %0, s24 \n\t" //r0 = s0 ++ : "=r"(dot), "+r"(v0), "+r"(v1): ++ : "d8", "d9", "d10", "d11", "d12" ++ ++ ); ++ return dot; ++} ++ ++#endif ++ + // 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication + // and 3DNOW! 4x4 4x4 matrix multiplication + // 2011-01-03 Balrog - removed because is in NASM format and not 64-bit compatible + // This will need fixing. ++#ifndef __ARM_NEON__ + MULMATRIX MulMatrices = MulMatricesC; + TRANSFORMVECTOR TransformVector = TransformVectorC; + TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC; + DOTPRODUCT DotProduct = DotProductC; + NORMALIZEVECTOR NormalizeVector = NormalizeVectorC; ++#endif + + void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4]) + { +@@ -361,6 +455,7 @@ + + void math_init() + { ++#ifndef __ARM_NEON__ + #ifndef _DEBUG + int IsSSE = FALSE; + #if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE) +@@ -429,4 +524,5 @@ + } + + #endif //_DEBUG ++#endif //__ARM_NEON__ + } +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h ./Glide64/3dmath.h +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h 2013-09-06 22:05:28.000000000 +0200 ++++ ./Glide64/3dmath.h 2013-09-14 19:01:12.000000000 +0200 +@@ -42,7 +42,22 @@ + void calc_sphere (VERTEX *v); + + void math_init(); ++#ifdef __ARM_NEON__ ++float DotProductC(register float *v1, register float *v2); ++void NormalizeVectorC(float *v); ++void TransformVectorC(float *src, float *dst, float mat[4][4]); ++void InverseTransformVectorC (float *src, float *dst, float mat[4][4]); ++void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4]); ++void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4]); ++void Normalize_neon(float v[3]); ++float DotProduct_neon( float v0[3], float v1[3] ); + ++#define MulMatrices MulMatricesC //MultMatrix_neon ++#define TransformVector TransformVectorC ++#define InverseTransformVector InverseTransformVectorC ++#define DotProduct DotProductC //DotProduct_neon ++#define NormalizeVector NormalizeVectorC //Normalize_neon ++#else + typedef void (*MULMATRIX)(float m1[4][4],float m2[4][4],float r[4][4]); + extern MULMATRIX MulMatrices; + typedef void (*TRANSFORMVECTOR)(float *src,float *dst,float mat[4][4]); +@@ -52,3 +67,4 @@ + extern DOTPRODUCT DotProduct; + typedef void (*NORMALIZEVECTOR)(float *v); + extern NORMALIZEVECTOR NormalizeVector; ++#endif +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp ./Glide64/3dmathneon.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp 1970-01-01 01:00:00.000000000 +0100 ++++ ./Glide64/3dmathneon.cpp 2013-09-13 23:05:47.000000000 +0200 +@@ -0,0 +1,133 @@ ++#include "3dmath.h" ++ ++static void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4]) ++{ ++ asm volatile ( ++ "vld1.32 {d0, d1}, [%1]! \n\t" //q0 = m1 ++ "vld1.32 {d2, d3}, [%1]! \n\t" //q1 = m1+4 ++ "vld1.32 {d4, d5}, [%1]! \n\t" //q2 = m1+8 ++ "vld1.32 {d6, d7}, [%1] \n\t" //q3 = m1+12 ++ "vld1.32 {d16, d17}, [%0]! \n\t" //q8 = m0 ++ "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m0+4 ++ "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m0+8 ++ "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m0+12 ++ ++ "vmul.f32 q12, q8, d0[0] \n\t" //q12 = q8 * d0[0] ++ "vmul.f32 q13, q8, d2[0] \n\t" //q13 = q8 * d2[0] ++ "vmul.f32 q14, q8, d4[0] \n\t" //q14 = q8 * d4[0] ++ "vmul.f32 q15, q8, d6[0] \n\t" //q15 = q8 * d6[0] ++ "vmla.f32 q12, q9, d0[1] \n\t" //q12 = q9 * d0[1] ++ "vmla.f32 q13, q9, d2[1] \n\t" //q13 = q9 * d2[1] ++ "vmla.f32 q14, q9, d4[1] \n\t" //q14 = q9 * d4[1] ++ "vmla.f32 q15, q9, d6[1] \n\t" //q15 = q9 * d6[1] ++ "vmla.f32 q12, q10, d1[0] \n\t" //q12 = q10 * d0[0] ++ "vmla.f32 q13, q10, d3[0] \n\t" //q13 = q10 * d2[0] ++ "vmla.f32 q14, q10, d5[0] \n\t" //q14 = q10 * d4[0] ++ "vmla.f32 q15, q10, d7[0] \n\t" //q15 = q10 * d6[0] ++ "vmla.f32 q12, q11, d1[1] \n\t" //q12 = q11 * d0[1] ++ "vmla.f32 q13, q11, d3[1] \n\t" //q13 = q11 * d2[1] ++ "vmla.f32 q14, q11, d5[1] \n\t" //q14 = q11 * d4[1] ++ "vmla.f32 q15, q11, d7[1] \n\t" //q15 = q11 * d6[1] ++ ++ "vst1.32 {d24, d25}, [%2]! \n\t" //d = q12 ++ "vst1.32 {d26, d27}, [%2]! \n\t" //d+4 = q13 ++ "vst1.32 {d28, d29}, [%2]! \n\t" //d+8 = q14 ++ "vst1.32 {d30, d31}, [%2] \n\t" //d+12 = q15 ++ ++ :"+r"(m0), "+r"(m1), "+r"(dest): ++ : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", ++ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", ++ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", ++ "memory" ++ ); ++} ++ ++static void TransformVectorNormalize_neon(float vec[3], float mtx[4][4]) ++{ ++ asm volatile ( ++ "vld1.32 {d0}, [%1] \n\t" //Q0 = v ++ "flds s2, [%1, #8] \n\t" //Q0 = v ++ "vld1.32 {d18, d19}, [%0]! \n\t" //Q1 = m ++ "vld1.32 {d20, d21}, [%0]! \n\t" //Q2 = m+4 ++ "vld1.32 {d22, d23}, [%0] \n\t" //Q3 = m+8 ++ ++ "vmul.f32 q2, q9, d0[0] \n\t" //q2 = q9*Q0[0] ++ "vmla.f32 q2, q10, d0[1] \n\t" //Q5 += Q1*Q0[1] ++ "vmla.f32 q2, q11, d1[0] \n\t" //Q5 += Q2*Q0[2] ++ ++ "vmul.f32 d0, d4, d4 \n\t" //d0 = d0*d0 ++ "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1] ++ "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d1*d1 ++ ++ "vmov.f32 d1, d0 \n\t" //d1 = d0 ++ "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0) ++ "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 ++ "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2 ++ "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3 ++ "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 ++ "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2 ++ "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4 ++ ++ "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4 ++ ++ "vst1.32 {d4}, [%1] \n\t" //Q4 = m+12 ++ "fsts s10, [%1, #8] \n\t" //Q4 = m+12 ++ : "+r"(mtx): "r"(vec) ++ : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory" ++ ); ++} ++ ++static void Normalize_neon(float v[3]) ++{ ++ asm volatile ( ++ "vld1.32 {d4}, [%0]! \n\t" //d4={x,y} ++ "flds s10, [%0] \n\t" //d5[0] = z ++ "sub %0, %0, #8 \n\t" //d5[0] = z ++ "vmul.f32 d0, d4, d4 \n\t" //d0= d4*d4 ++ "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1] ++ "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5 ++ ++ "vmov.f32 d1, d0 \n\t" //d1 = d0 ++ "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0) ++ "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 ++ "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2 ++ "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3 ++ "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 ++ "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2 ++ "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4 ++ ++ "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4 ++ "vst1.32 {d4}, [%0]! \n\t" //d2={x0,y0}, d3={z0, w0} ++ "fsts s10, [%0] \n\t" //d2={x0,y0}, d3={z0, w0} ++ ++ :"+r"(v) : ++ : "d0", "d1", "d2", "d3", "d4", "d5", "memory" ++ ); ++} ++ ++static float DotProduct_neon( float v0[3], float v1[3] ) ++{ ++ float dot; ++ asm volatile ( ++ "vld1.32 {d8}, [%1]! \n\t" //d8={x0,y0} ++ "vld1.32 {d10}, [%2]! \n\t" //d10={x1,y1} ++ "flds s18, [%1, #0] \n\t" //d9[0]={z0} ++ "flds s22, [%2, #0] \n\t" //d11[0]={z1} ++ "vmul.f32 d12, d8, d10 \n\t" //d0= d2*d4 ++ "vpadd.f32 d12, d12, d12 \n\t" //d0 = d[0] + d[1] ++ "vmla.f32 d12, d9, d11 \n\t" //d0 = d0 + d3*d5 ++ "fmrs %0, s24 \n\t" //r0 = s0 ++ : "=r"(dot), "+r"(v0), "+r"(v1): ++ : "d8", "d9", "d10", "d11", "d12" ++ ++ ); ++ return dot; ++} ++ ++void MathInitNeon() ++{ ++ MulMatrices = MultMatrix_neon; ++ //TransformVectorNormalize = TransformVectorNormalize_neon; ++ NormalizeVector = Normalize_neon; ++ DotProduct = DotProduct_neon; ++} +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp ./Glide64/Config.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp 2013-09-06 22:05:29.000000000 +0200 ++++ ./Glide64/Config.cpp 2013-09-07 10:51:27.000000000 +0200 +@@ -89,7 +89,7 @@ + { 640, 480 }, + { 800, 600 }, + { 960, 720 }, +- { 856, 480 }, ++ { 800, 480 }, + { 512, 256 }, + { 1024, 768 }, + { 1280, 1024 }, +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp ./Glide64/CRC.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp 2013-09-06 22:05:28.000000000 +0200 ++++ ./Glide64/CRC.cpp 2013-09-08 13:12:00.000000000 +0200 +@@ -43,6 +43,7 @@ + // + //**************************************************************** + //* ++ + #define CRC32_POLYNOMIAL 0x04C11DB7 + + unsigned int CRCTable[ 256 ]; +@@ -140,3 +141,4 @@ + return Crc32; + } + //*/ ++ +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp ./Glide64/FBtoScreen.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp 2013-09-06 22:05:29.000000000 +0200 ++++ ./Glide64/FBtoScreen.cpp 2013-09-08 11:57:33.000000000 +0200 +@@ -165,12 +165,15 @@ + for (wxUint32 w = 0; w < 256; w++) + { + col = *(src++); +- r = (wxUint8)((col >> 24)&0xFF); ++ r = (wxUint8)((col >> (24+3))&0x1F); ++ g = (wxUint8)((col >> (16+2))&0x3F); ++ b = (wxUint8)((col >> (8+3))&0x1F); ++/* r = (wxUint8)((col >> 24)&0xFF); + r = (wxUint8)((float)r / 255.0f * 31.0f); + g = (wxUint8)((col >> 16)&0xFF); + g = (wxUint8)((float)g / 255.0f * 63.0f); + b = (wxUint8)((col >> 8)&0xFF); +- b = (wxUint8)((float)b / 255.0f * 31.0f); ++ b = (wxUint8)((float)b / 255.0f * 31.0f);*/ //*SEB* + *(dst++) = (r << 11) | (g << 5) | b; + } + src += (fb_info.width - 256); +@@ -261,12 +264,15 @@ + if (idx >= bound) + break; + c32 = src32[idx]; +- r = (wxUint8)((c32 >> 24)&0xFF); ++ r = (wxUint8)((c32 >> (24+3))&0x1F); ++ g = (wxUint8)((c32 >> (16+2))&0x3F); ++ b = (wxUint8)((c32 >> (8+3))&0x1F); ++/* r = (wxUint8)((c32 >> 24)&0xFF); + r = (wxUint8)((float)r / 255.0f * 31.0f); + g = (wxUint8)((c32 >> 16)&0xFF); + g = (wxUint8)((float)g / 255.0f * 63.0f); + b = (wxUint8)((c32 >> 8)&0xFF); +- b = (wxUint8)((float)b / 255.0f * 31.0f); ++ b = (wxUint8)((float)b / 255.0f * 31.0f);*/ //*SEB* + a = (c32&0xFF) ? 1 : 0; + *(dst++) = (a<<15) | (r << 10) | (g << 5) | b; + } +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h ./Glide64/Gfx_1.3.h +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h 2013-09-06 22:05:29.000000000 +0200 ++++ ./Glide64/Gfx_1.3.h 2013-09-08 16:22:57.000000000 +0200 +@@ -106,6 +106,8 @@ + // ** TAKE OUT BEFORE RELEASE!!! ** + //#define LOGGING // log of spec functions called + //#define LOG_KEY // says "Key!!!" in the log when space bar is pressed ++//#define EXT_LOGGING ++//#define PERFORMANCE + + //#define LOG_UCODE + +@@ -120,15 +122,15 @@ + + #define FPS // fps counter able? (not enabled necessarily) + +-#define LOGNOTKEY // Log if not pressing: +-#define LOGKEY 0x11 // this key (CONTROL) ++//#define LOGNOTKEY // Log if not pressing: ++//#define LOGKEY 0x11 // this key (CONTROL) + + //#define LOG_COMMANDS // log the whole 64-bit command as (0x........, 0x........) + + #define CATCH_EXCEPTIONS // catch exceptions so it doesn't freeze and will report + // "The gfx plugin has caused an exception" instead. + +-#define FLUSH // flush the file buffer. slower logging, but makes sure ++//#define FLUSH // flush the file buffer. slower logging, but makes sure + // the command is logged before continuing (in case of + // crash or exception, the log will not be cut short) + #ifndef _ENDUSER_RELEASE_ +@@ -144,7 +146,7 @@ + + + // Usually enabled +-#define LARGE_TEXTURE_HANDLING // allow large-textured objects to be split? ++//#define LARGE_TEXTURE_HANDLING // allow large-textured objects to be split? + + #ifdef ALTTAB_FIX + extern HHOOK hhkLowLevelKybd; +@@ -189,7 +191,6 @@ + + int CheckKeyPressed(int key, int mask); + +-//#define PERFORMANCE + #ifdef PERFORMANCE + extern int64 perf_cur; + extern int64 perf_next; +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp ./Glide64/Main.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp 2013-09-06 22:05:29.000000000 +0200 ++++ ./Glide64/Main.cpp 2013-09-15 17:06:29.000000000 +0200 +@@ -170,7 +170,7 @@ + // 60=0x0, 70=0x1, 72=0x2, 75=0x3, 80=0x4, 90=0x5, 100=0x6, 85=0x7, 120=0x8, none=0xff + + #ifdef PAULSCODE +-#include "ae_bridge.h" ++//#include "ae_bridge.h" + #include "FrameSkipper.h" + FrameSkipper frameSkipper; + #endif +@@ -1768,12 +1768,13 @@ + EXPORT void CALL RomClosed (void) + { + VLOG ("RomClosed ()\n"); ++printf("RomClosed ()\n"); + + CLOSE_RDP_LOG (); + CLOSE_RDP_E_LOG (); + rdp.window_changed = TRUE; + romopen = FALSE; +- if (fullscreen && evoodoo) ++// if (fullscreen && evoodoo)//*SEB* + ReleaseGfx (); + } + +@@ -1973,9 +1974,6 @@ + wxUint32 update_screen_count = 0; + EXPORT void CALL UpdateScreen (void) + { +-#ifdef PAULSCODE +- frameSkipper.update(); +-#endif + #ifdef LOG_KEY + if (CheckKeyPressed(G64_VK_SPACE, 0x0001)) + { +@@ -2020,6 +2018,9 @@ + no_dlist = true; + ClearCache (); + UpdateScreen(); ++#ifdef PAULSCODE ++ frameSkipper.update(); ++#endif + return; + } + //*/ +@@ -2035,11 +2036,17 @@ + rdp.updatescreen = 1; + newSwapBuffers (); + } ++#ifdef PAULSCODE ++ frameSkipper.update(); ++#endif + return; + } + //*/ + if (settings.swapmode == 0) + newSwapBuffers (); ++#ifdef PAULSCODE ++ frameSkipper.update(); ++#endif + } + + static void DrawWholeFrameBufferToScreen() +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp ./Glide64/rdp.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp 2013-09-06 22:05:29.000000000 +0200 ++++ ./Glide64/rdp.cpp 2013-09-13 22:23:52.000000000 +0200 +@@ -56,6 +56,10 @@ + extern FrameSkipper frameSkipper; + #endif + ++#ifdef PERFORMANCE ++#include "ticks.h" ++#endif ++ + /* + const int NumOfFormats = 3; + SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} }; +@@ -633,18 +637,21 @@ + + EXPORT void CALL ProcessDList(void) + { +- SoftLocker lock(mutexProcessDList); ++// SoftLocker lock(mutexProcessDList); + #ifdef PAULSCODE +- if (frameSkipper.willSkipNext() || !lock.IsOk()) //mutex is busy ++ if (frameSkipper.willSkipNext() /*|| !lock.IsOk()*/) //mutex is busy + #else +- if (!lock.IsOk()) //mutex is busy ++ if (/*!lock.IsOk()*/0) //mutex is busy + #endif + { ++// printf("Frameskip, reason=%s\n", (lock.IsOk())?"lock":"frameskip"); + if (!fullscreen) + drawNoFullscreenMessage(); + // Set an interrupt to allow the game to continue + *gfx.MI_INTR_REG |= 0x20; + gfx.CheckInterrupts(); ++ *gfx.MI_INTR_REG |= 0x01; ++ gfx.CheckInterrupts(); + return; + } + +@@ -717,7 +724,18 @@ + unimp.close(); + } + #endif +- ++/* ++#ifdef PAULSCODE ++ if (frameSkipper.willSkipNext()) ++ { ++ *gfx.MI_INTR_REG |= 0x20; ++ gfx.CheckInterrupts(); ++ *gfx.MI_INTR_REG |= 0x01; ++ gfx.CheckInterrupts(); ++ return; ++ } ++#endif ++*/ + //* Set states *// + if (settings.swapmode > 0) + SwapOK = TRUE; +@@ -818,7 +836,7 @@ + rdp.pc[rdp.pc_i] = (a+8) & BMASK; + + #ifdef PERFORMANCE +- perf_cur = wxDateTime::UNow(); ++ perf_cur = ticksGetTicks(); + #endif + // Process this instruction + gfx_instruction[settings.ucode][rdp.cmd0>>24] (); +@@ -837,9 +855,13 @@ + } + + #ifdef PERFORMANCE +- perf_next = wxDateTime::UNow(); +- sprintf (out_buf, "perf %08lx: %016I64d\n", a-8, (perf_next-perf_cur).Format(_T("%l")).mb_str()); ++ perf_next = ticksGetTicks(); ++ sprintf (out_buf, "perf %08x: %lli\n", a-8, (perf_next-perf_cur)); ++#ifdef RDP_LOGGING + rdp_log << out_buf; ++#else ++ printf(out_buf); ++#endif + #endif + + } while (!rdp.halt); +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp ./Glide64/Util.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp 2013-09-06 22:05:29.000000000 +0200 ++++ ./Glide64/Util.cpp 2013-09-08 12:39:52.000000000 +0200 +@@ -289,29 +289,29 @@ + deltaZ = dzdx = 0; + if (linew == 0 && (fb_depth_render_enabled || (rdp.rm & 0xC00) == 0xC00)) + { +- double X0 = vtx[0]->sx / rdp.scale_x; +- double Y0 = vtx[0]->sy / rdp.scale_y; +- double X1 = vtx[1]->sx / rdp.scale_x; +- double Y1 = vtx[1]->sy / rdp.scale_y; +- double X2 = vtx[2]->sx / rdp.scale_x; +- double Y2 = vtx[2]->sy / rdp.scale_y; +- double diffy_02 = Y0 - Y2; +- double diffy_12 = Y1 - Y2; +- double diffx_02 = X0 - X2; +- double diffx_12 = X1 - X2; +- +- double denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02); +- if(denom*denom > 0.0) +- { +- double diffz_02 = vtx[0]->sz - vtx[2]->sz; +- double diffz_12 = vtx[1]->sz - vtx[2]->sz; +- double fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom; ++ float X0 = vtx[0]->sx / rdp.scale_x; ++ float Y0 = vtx[0]->sy / rdp.scale_y; ++ float X1 = vtx[1]->sx / rdp.scale_x; ++ float Y1 = vtx[1]->sy / rdp.scale_y; ++ float X2 = vtx[2]->sx / rdp.scale_x; ++ float Y2 = vtx[2]->sy / rdp.scale_y; ++ float diffy_02 = Y0 - Y2; ++ float diffy_12 = Y1 - Y2; ++ float diffx_02 = X0 - X2; ++ float diffx_12 = X1 - X2; ++ ++ float denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02); ++ if(denom*denom > 0.0f) ++ { ++ float diffz_02 = vtx[0]->sz - vtx[2]->sz; ++ float diffz_12 = vtx[1]->sz - vtx[2]->sz; ++ float fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom; + if ((rdp.rm & 0xC00) == 0xC00) { + // Calculate deltaZ per polygon for Decal z-mode +- double fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom; +- double fdz = fabs(fdzdx) + fabs(fdzdy); ++ float fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom; ++ float fdz = fabs(fdzdx) + fabs(fdzdy); + if ((settings.hacks & hack_Zelda) && (rdp.rm & 0x800)) +- fdz *= 4.0; // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads ++ fdz *= 4.0f; // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads + deltaZ = max(8, (int)fdz); + } + dzdx = (int)(fdzdx * 65536.0); +@@ -881,12 +881,12 @@ + //*/ + + typedef struct { +- double d; +- double x; +- double y; ++ float d; //*SEB* was doubles ++ float x; ++ float y; + } LineEuqationType; + +-static double EvaLine(LineEuqationType &li, double x, double y) ++static float EvaLine(LineEuqationType &li, float x, float y) //*SEB* all double before + { + return li.x*x+li.y*y+li.d; + } +@@ -906,7 +906,7 @@ + } + + +-__inline double interp3p(float a, float b, float c, double r1, double r2) ++__inline float interp3p(float a, float b, float c, float r1, float r2) //*SEB* r1 and r2 and function was double + { + return (a)+(((b)+((c)-(b))*(r2))-(a))*(r1); + } +@@ -915,34 +915,34 @@ + (a+(((b)+((c)-(b))*(r2))-(a))*(r1)) + */ + +-static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out) ++static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out) //*SEB* all double before + { + + LineEuqationType line; + Create1LineEq(line, v2, v3, v1); + +- double aDot = (out.x*line.x + out.y*line.y); +- double bDot = (v1.sx*line.x + v1.sy*line.y); ++ float aDot = (out.x*line.x + out.y*line.y); ++ float bDot = (v1.sx*line.x + v1.sy*line.y); + +- double scale1 = ( - line.d - aDot) / ( bDot - aDot ); ++ float scale1 = ( - line.d - aDot) / ( bDot - aDot ); + +- double tx = out.x + scale1 * (v1.sx - out.x); +- double ty = out.y + scale1 * (v1.sy - out.y); ++ float tx = out.x + scale1 * (v1.sx - out.x); ++ float ty = out.y + scale1 * (v1.sy - out.y); + +- double s1 = 101.0, s2 = 101.0; +- double den = tx - v1.sx; +- if (fabs(den) > 1.0) ++ float s1 = 101.0, s2 = 101.0; ++ float den = tx - v1.sx; ++ if (fabsf(den) > 1.0) + s1 = (out.x-v1.sx)/den; + if (s1 > 100.0f) + s1 = (out.y-v1.sy)/(ty-v1.sy); + + den = v3.sx - v2.sx; +- if (fabs(den) > 1.0) ++ if (fabsf(den) > 1.0) + s2 = (tx-v2.sx)/den; + if (s2 > 100.0f) + s2 =(ty-v2.sy)/(v3.sy-v2.sy); + +- double w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2); ++ float w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2); + + out.r = real_to_char(interp3p(v1.r*v1.oow,v2.r*v2.oow,v3.r*v3.oow,s1,s2)*w); + out.g = real_to_char(interp3p(v1.g*v1.oow,v2.g*v2.oow,v3.g*v3.oow,s1,s2)*w); +@@ -976,8 +976,8 @@ + */ + float deltaS, deltaT; + float deltaX, deltaY; +- double deltaTexels, deltaPixels, lodFactor = 0; +- double intptr; ++ float deltaTexels, deltaPixels, lodFactor = 0; //*SEB* double before ++ float intptr; //*SEB* double before + float s_scale = rdp.tiles[rdp.cur_tile].width / 255.0f; + float t_scale = rdp.tiles[rdp.cur_tile].height / 255.0f; + if (settings.lodmode == 1) +@@ -1019,7 +1019,7 @@ + float lod_fraction = 1.0f; + if (lod_tile < rdp.cur_tile + rdp.mipmap_level) + { +- lod_fraction = max((float)modf(lodFactor / pow(2.,lod_tile),&intptr), rdp.prim_lodmin / 255.0f); ++ lod_fraction = max((float)modff(lodFactor / powf(2.,lod_tile),&intptr), (float)rdp.prim_lodmin / 255.0f); + } + float detailmax; + if (cmb.dc0_detailmax < 0.5f) +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp ./GlideHQ/TxDbg.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp 2013-09-06 22:05:30.000000000 +0200 ++++ ./GlideHQ/TxDbg.cpp 2013-09-07 12:06:11.000000000 +0200 +@@ -28,6 +28,8 @@ + #include + #include + ++#define _GLIBCXX_HAVE_BROKEN_VSWPRINTF 1 ++ + TxDbg::TxDbg() + { + _level = DBG_LEVEL; +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp ./Glitch64/combiner.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp 2013-09-06 22:05:30.000000000 +0200 ++++ ./Glitch64/combiner.cpp 2013-09-14 10:16:36.000000000 +0200 +@@ -29,6 +29,8 @@ + #include "glide.h" + #include "main.h" + ++#define GLchar char ++ + void vbo_draw(); + + static int fct[4], source0[4], operand0[4], source1[4], operand1[4], source2[4], operand2[4]; +@@ -117,10 +119,11 @@ + // using gl_FragCoord is terribly slow on ATI and varying variables don't work for some unknown + // reason, so we use the unused components of the texture2 coordinates + static const char* fragment_shader_dither = +-" float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n" ++" \n" ++/*" float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n" + " float dithy = (gl_TexCoord[2].a + 1.0)*0.5*1000.0; \n" + " if(texture2D(ditherTex, vec2((dithx-32.0*floor(dithx/32.0))/32.0, \n" +-" (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n" ++" (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n"*/ + ; + + static const char* fragment_shader_default = +@@ -165,11 +168,16 @@ + "} \n" + ; + ++static const char* fragment_shader_alt_end = ++" \n" ++"} \n" ++; ++ + static const char* vertex_shader = + SHADER_HEADER + "#define Z_MAX 65536.0 \n" + "attribute highp vec4 aVertex; \n" +-"attribute highp vec4 aColor; \n" ++"attribute mediump vec4 aColor; \n" //*SEB* highp -> lowp + "attribute highp vec4 aMultiTexCoord0; \n" + "attribute highp vec4 aMultiTexCoord1; \n" + "attribute float aFog; \n" +@@ -245,7 +253,7 @@ + + // creating a fake texture + glBindTexture(GL_TEXTURE_2D, default_texture); +- glTexImage2D(GL_TEXTURE_2D, 0, 3, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture); ++ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + +@@ -286,7 +294,7 @@ + strlen(fragment_shader_end)+1); + strcpy(fragment_shader, fragment_shader_header); + strcat(fragment_shader, fragment_shader_default); +- strcat(fragment_shader, fragment_shader_end); ++ strcat(fragment_shader, fragment_shader_end); /*SEB*/ + glShaderSource(fragment_shader_object, 1, (const GLchar**)&fragment_shader, NULL); + free(fragment_shader); + +@@ -408,6 +416,7 @@ + int dither_enabled; + int blackandwhite0; + int blackandwhite1; ++ int alpha_test; //*SEB* + GLuint fragment_shader_object; + GLuint program_object; + int texture0_location; +@@ -489,6 +498,8 @@ + int i; + int chroma_color_location; + int log_length; ++ ++ int noalpha; + + need_to_compile = 0; + +@@ -502,6 +513,7 @@ + prog.texture0_combinera == texture0_combinera_key && + prog.texture1_combinera == texture1_combinera_key && + prog.fog_enabled == fog_enabled && ++ prog.alpha_test == alpha_test && //*SEB* + prog.chroma_enabled == chroma_enabled && + prog.dither_enabled == dither_enabled && + prog.blackandwhite0 == blackandwhite0 && +@@ -514,11 +526,13 @@ + } + } + +- if(shader_programs != NULL) +- shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key)); ++ if(shader_programs != NULL) { ++ if ((number_of_programs+1)>1024) ++ shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key)); ++ } + else +- shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key)); +- //printf("number of shaders %d\n", number_of_programs); ++ shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key)*1024); ++ //printf("number of shaders %d\n", number_of_programs); + + shader_programs[number_of_programs].color_combiner = color_combiner_key; + shader_programs[number_of_programs].alpha_combiner = alpha_combiner_key; +@@ -531,6 +545,7 @@ + shader_programs[number_of_programs].dither_enabled = dither_enabled; + shader_programs[number_of_programs].blackandwhite0 = blackandwhite0; + shader_programs[number_of_programs].blackandwhite1 = blackandwhite1; ++ shader_programs[number_of_programs].alpha_test = alpha_test; //*SEB* + + if(chroma_enabled) + { +@@ -557,7 +572,10 @@ + strcat(fragment_shader, fragment_shader_color_combiner); + strcat(fragment_shader, fragment_shader_alpha_combiner); + if(fog_enabled) strcat(fragment_shader, fragment_shader_fog); +- strcat(fragment_shader, fragment_shader_end); ++ if (alpha_test) ++ strcat(fragment_shader, fragment_shader_end); ++ else ++ strcat(fragment_shader, fragment_shader_alt_end); //*SEB* + if(chroma_enabled) strcat(fragment_shader, fragment_shader_chroma); + + shader_programs[number_of_programs].fragment_shader_object = glCreateShader(GL_FRAGMENT_SHADER); +@@ -1719,7 +1737,7 @@ + glActiveTexture(GL_TEXTURE2); + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, 33*1024*1024); +- glTexImage2D(GL_TEXTURE_2D, 0, 4, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture); ++ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glDisable(GL_TEXTURE_2D); +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp ./Glitch64/geometry.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp 2013-09-06 22:05:30.000000000 +0200 ++++ ./Glitch64/geometry.cpp 2013-09-12 22:13:33.000000000 +0200 +@@ -34,7 +34,7 @@ + #define VERTEX_SIZE sizeof(VERTEX) //Size of vertex struct + + #ifdef PAULSCODE +-#include "ae_bridge.h" ++//#include "ae_bridge.h" + static float polygonOffsetFactor; + static float polygonOffsetUnits; + #endif +@@ -338,8 +338,11 @@ + void FindBestDepthBias() + { + #ifdef PAULSCODE +- int hardwareType = Android_JNI_GetHardwareType(); +- Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits); ++/* int hardwareType = Android_JNI_GetHardwareType(); ++ Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits);*/ ++// glPolygonOffset(0.2f, 0.2f); ++ polygonOffsetFactor=0.2f; ++ polygonOffsetUnits=0.2f; + #else + float f, bestz = 0.25f; + int x; +@@ -386,7 +389,11 @@ + if (level) + { + #ifdef PAULSCODE +- glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits); ++// glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits); ++ if(w_buffer_mode) ++ glPolygonOffset(1.0f, -(float)level*polygonOffsetUnits); ++ else ++ glPolygonOffset(0, (float)level*3.0f); + #else + if(w_buffer_mode) + glPolygonOffset(1.0f, -(float)level*zscale/255.0f); +@@ -408,13 +415,13 @@ + grDrawTriangle( const void *a, const void *b, const void *c ) + { + LOG("grDrawTriangle()\r\n\t"); +- ++/* + if(nvidia_viewport_hack && !render_to_texture) + { + glViewport(0, viewport_offset, viewport_width, viewport_height); + nvidia_viewport_hack = 0; + } +- ++*/ + reloadTexture(); + + if(need_to_compile) compile_shader(); +@@ -588,13 +595,13 @@ + { + void **pointers = (void**)pointers2; + LOG("grDrawVertexArray(%d,%d)\r\n", mode, Count); +- ++/* + if(nvidia_viewport_hack && !render_to_texture) + { + glViewport(0, viewport_offset, viewport_width, viewport_height); + nvidia_viewport_hack = 0; + } +- ++*/ + reloadTexture(); + + if(need_to_compile) compile_shader(); +@@ -612,13 +619,13 @@ + grDrawVertexArrayContiguous(FxU32 mode, FxU32 Count, void *pointers, FxU32 stride) + { + LOG("grDrawVertexArrayContiguous(%d,%d,%d)\r\n", mode, Count, stride); +- ++/* + if(nvidia_viewport_hack && !render_to_texture) + { + glViewport(0, viewport_offset, viewport_width, viewport_height); + nvidia_viewport_hack = 0; + } +- ++*/ + if(stride != 156) + { + LOGINFO("Incompatible stride\n"); +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp ./Glitch64/glitchmain.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp 2013-09-06 22:05:30.000000000 +0200 ++++ ./Glitch64/glitchmain.cpp 2013-09-15 17:13:49.000000000 +0200 +@@ -656,6 +656,9 @@ + #ifdef _WIN32 + glCompressedTexImage2DARB = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)wglGetProcAddress("glCompressedTexImage2DARB"); + #endif ++/*SEB*/ ++ glPixelStorei(GL_UNPACK_ALIGNMENT, 1); ++ glPixelStorei(GL_PACK_ALIGNMENT, 1); + + + #ifdef _WIN32 +@@ -806,6 +809,7 @@ + fullscreen = 0; + } + #else ++ CoreVideo_Quit(); + //SDL_QuitSubSystem(SDL_INIT_VIDEO); + //sleep(2); + #endif +@@ -823,7 +827,7 @@ + int i; + static int fbs_init = 0; + +- //printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd); ++ //printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd); + LOG("grTextureBufferExt(%d, %d, %d, %d %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd); + if (lodmin != lodmax) display_warning("grTextureBufferExt : loading more than one LOD"); + if (!use_fbo) { +@@ -907,8 +911,8 @@ + tmu_usage[rtmu].min = pBufferAddress; + if ((unsigned int) tmu_usage[rtmu].max < pBufferAddress+size) + tmu_usage[rtmu].max = pBufferAddress+size; +- // printf("tmu %d usage now %gMb - %gMb\n", +- // rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f); ++ //printf("tmu %d usage now %gMb - %gMb\n", ++ // rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f); + + + width = pBufferWidth; +@@ -927,14 +931,14 @@ + texbufs[i].fmt = fmt; + if (i == texbuf_i) + texbuf_i = (texbuf_i+1)&(NB_TEXBUFS-1); +- //printf("texbuf %x fmt %x\n", pBufferAddress, fmt); ++ //printf("texbuf %x fmt %x\n", pBufferAddress, fmt); + + // ZIGGY it speeds things up to not delete the buffers + // a better thing would be to delete them *sometimes* + // remove_tex(pBufferAddress+1, pBufferAddress + size); + add_tex(pBufferAddress); + +- //printf("viewport %dx%d\n", width, height); ++ //printf("viewport %dx%d\n", width, height); + if (height > screen_height) { + glViewport( 0, viewport_offset + screen_height - height, width, height); + } else +@@ -1009,7 +1013,6 @@ + } + } + } +- + remove_tex(pBufferAddress, pBufferAddress + width*height*2/*grTexFormatSize(fmt)*/); + //create new FBO + glGenFramebuffers( 1, &(fbs[nb_fb].fbid) ); +@@ -1768,6 +1771,7 @@ + GrLfbInfo_t *info ) + { + LOG("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline); ++//printf("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline); + if (type == GR_LFB_WRITE_ONLY) + { + display_warning("grLfbLock : write only"); +@@ -1792,12 +1796,32 @@ + if(buffer != GR_BUFFER_AUXBUFFER) + { + if (writeMode == GR_LFBWRITEMODE_888) { ++/*SEB*/ ++ buf = (unsigned char*)malloc(width*height*4); + //printf("LfbLock GR_LFBWRITEMODE_888\n"); + info->lfbPtr = frameBuffer; + info->strideInBytes = width*4; + info->writeMode = GR_LFBWRITEMODE_888; + info->origin = origin; + //glReadPixels(0, viewport_offset, width, height, GL_BGRA, GL_UNSIGNED_BYTE, frameBuffer); ++ glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf); ++ ++/*SEB*/ ++ unsigned char *p=buf; ++ for (j=0; jorigin = origin; + glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf); + ++/*SEB*/ ++ unsigned char *p=buf; + for (j=0; j> 3) << 11) | + ((buf[j*width*4+i*4+1] >> 2) << 5) | +- (buf[j*width*4+i*4+2] >> 3); ++ (buf[j*width*4+i*4+2] >> 3);*/ ++ *(f++) = ++ ((*(p) >> 3) << 11) | ++ ((*(p+1) >> 2) << 5) | ++ (*(p+2) >> 3); ++ p+=4; + } + } + free(buf); +@@ -1826,6 +1858,7 @@ + info->strideInBytes = width*2; + info->writeMode = GR_LFBWRITEMODE_ZA16; + info->origin = origin; ++ //*SEB* *TODO* check alignment + glReadPixels(0, viewport_offset, width, height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer); + } + } +@@ -1855,6 +1888,7 @@ + unsigned short *frameBuffer = (unsigned short*)dst_data; + unsigned short *depthBuffer = (unsigned short*)dst_data; + LOG("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride); ++//printf("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride); + + switch(src_buffer) + { +@@ -1876,15 +1910,22 @@ + buf = (unsigned char*)malloc(src_width*src_height*4); + + glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_RGBA, GL_UNSIGNED_BYTE, buf); +- + for (j=0; j> 3) << 11) | + ((buf[(src_height-j-1)*src_width*4+i*4+1] >> 2) << 5) | +- (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3); ++ (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3);*/ ++ *(f++) = ++ ((*(p) >> 3) << 11) | ++ ((*(p+1) >> 2) << 5) | ++ (*(p+2) >> 3); ++ p+=4; + } + } + free(buf); +@@ -1892,15 +1933,19 @@ + else + { + buf = (unsigned char*)malloc(src_width*src_height*2); +- +- glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer); ++//*SEB read in buf, not depthBuffer. ++ glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, buf); + + for (j=0;j>10)&0x1F)<<3; + buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3; + buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3; +- buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0; ++ buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0;*/ ++ const unsigned int col = *(f++); ++ *(p)=((col>>10)&0x1F)<<3; ++ *(p+1)=((col>>5)&0x1F)<<3; ++ *(p+2)=((col>>0)&0x1F)<<3; ++ *(p+3)= (col>>15) ? 0xFF : 0; ++ p+=4; + } ++ p+=comp_tex; ++ f+=comp_stride; + } + break; + case GR_LFBWRITEMODE_555: +@@ -1969,12 +2029,20 @@ + { + for (i=0; i>10)&0x1F)<<3; + buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3; + buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3; +- buf[j*tex_width*4+i*4+3]=0xFF; ++ buf[j*tex_width*4+i*4+3]=0xFF;*/ ++ const unsigned int col = *(f++); ++ *(p)=((col>>10)&0x1F)<<3; ++ *(p+1)=((col>>5)&0x1F)<<3; ++ *(p+2)=((col>>0)&0x1F)<<3; ++ *(p+3)=0xFF; ++ p+=4; + } ++ p+=comp_tex; ++ f+=comp_stride; + } + break; + case GR_LFBWRITEMODE_565: +@@ -1982,12 +2050,20 @@ + { + for (i=0; i>11)&0x1F)<<3; + buf[j*tex_width*4+i*4+1]=((col>>5)&0x3F)<<2; + buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3; +- buf[j*tex_width*4+i*4+3]=0xFF; ++ buf[j*tex_width*4+i*4+3]=0xFF;*/ ++ const unsigned int col = *(f++); ++ *(p)=((col>>11)&0x1F)<<3; ++ *(p+1)=((col>>5)&0x3F)<<2; ++ *(p+2)=((col>>0)&0x1F)<<3; ++ *(p+3)=0xFF; ++ p+=4; + } ++ p+=comp_tex; ++ f+=comp_stride; + } + break; + default: +@@ -2006,7 +2082,7 @@ + #endif + + glBindTexture(GL_TEXTURE_2D, default_texture); +- glTexImage2D(GL_TEXTURE_2D, 0, 4, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf); ++ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf); + free(buf); + + set_copy_shader(); +diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp ./Glitch64/textures.cpp +--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp 2013-09-06 22:05:31.000000000 +0200 ++++ ./Glitch64/textures.cpp 2013-09-13 11:32:50.000000000 +0200 +@@ -26,6 +26,7 @@ + #include "glide.h" + #include "main.h" + #include ++#include + + /* Napalm extensions to GrTextureFormat_t */ + #define GR_TEXFMT_ARGB_CMP_FXT1 0x11 +@@ -107,7 +108,7 @@ + } + glDeleteTextures(n, t); + free(t); +- //printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax); ++//printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax); + } + + +@@ -115,7 +116,7 @@ + { + texlist *aux = list; + texlist *aux2; +- //printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id); ++//printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id); + if (list == NULL || id < list->id) + { + nbTex++; +@@ -435,8 +436,11 @@ + factor = -1; + else + factor = grTexFormat2GLPackedFmt(info->format, &gltexfmt, &glpixfmt, &glpackfmt); +- ++//printf("grTexDownloadMipmap, id=%x, size=%ix%i, format=%x\n", startAddress+1, width, height, info->format); + if (factor < 0) { ++ gltexfmt = GL_RGBA; ++ glpixfmt = GL_RGBA; ++ glpackfmt = GL_UNSIGNED_BYTE; + + // VP fixed the texture conversions to be more accurate, also swapped + // the for i/j loops so that is is less likely to break the memory cache +@@ -444,7 +448,7 @@ + switch(info->format) + { + case GR_TEXFMT_ALPHA_8: +- for (i=0; idata)[m]; ++ ((unsigned short*)texture)[n] = texel|(texel<<8); ++ m++; ++ n++; ++ } ++ } ++ ++ glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA; ++ glpackfmt = GL_UNSIGNED_BYTE; ++ factor = 1; + break; + case GR_TEXFMT_INTENSITY_8: // I8 support - H.Morii +- for (i=0; idata, width*height); ++ glformat = gltexfmt = glpixfmt = GL_LUMINANCE; ++ glpackfmt = GL_UNSIGNED_BYTE; + factor = 1; +- glformat = GL_ALPHA; + break; + case GR_TEXFMT_ALPHA_INTENSITY_44: + #if 1 +@@ -480,9 +503,9 @@ + { + for (j=0; jdata)[m]; ++/* unsigned int texel = (unsigned int)((unsigned char*)info->data)[m]; + #if 1 +- /* accurate conversion */ ++ // accurate conversion + unsigned int texel_hi = (texel & 0x000000F0) << 20; + unsigned int texel_low = texel & 0x0000000F; + texel_low |= (texel_low << 4); +@@ -493,61 +516,90 @@ + texel_hi |= ((texel_low << 16) | (texel_low << 8) | texel_low); + #endif + ((unsigned int*)texture)[n] = texel_hi; ++*/ ++ unsigned char texel = ((unsigned char*)info->data)[m]; ++ unsigned short texel_hi = (texel & 0x000000F0) << 4; ++ unsigned short texel_low = texel & 0x0000000F; ++ texel_low |= (texel_low << 4); ++ texel_hi |= ((texel_hi << 4) | (texel_low)); ++ ((unsigned short*)texture)[n] = texel_hi; + m++; + n++; + } + } + factor = 1; +- glformat = GL_LUMINANCE_ALPHA; ++ glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA; ++ glpackfmt = GL_UNSIGNED_BYTE; ++// glformat = GL_LUMINANCE_ALPHA; + #endif + break; + case GR_TEXFMT_RGB_565: +- for (i=0; idata)[m]; ++ {*/ ++/* unsigned int texel = (unsigned int)((unsigned short*)info->data)[m]; + unsigned int B = texel & 0x0000F800; + unsigned int G = texel & 0x000007E0; + unsigned int R = texel & 0x0000001F; + #if 0 +- /* accurate conversion */ ++ // accurate conversion + ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | ((R >> 2) << 16) | (G << 5) | ((G >> 9) << 8) | (B >> 8) | (B >> 13); + #else + ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | (G << 5) | (B >> 8); + #endif ++*/ ++/* const unsigned short texel = ((unsigned short*)info->data)[m]; ++ const unsigned short B = (texel & 0xF800)>>11; ++ const unsigned short G = texel & 0x07E0; ++ const unsigned short R = (texel & 0x001F)<<11; ++ ((unsigned short*)texture)[n] = R|G|B; + m++; + n++; + } +- } ++ }*/ ++ memcpy(texture, info->data, width*height*2); + factor = 2; +- glformat = GL_RGB; ++// glformat = GL_RGB; ++ glformat = gltexfmt = glpixfmt = GL_RGB; ++ glpackfmt = GL_UNSIGNED_SHORT_5_6_5; + break; + case GR_TEXFMT_ARGB_1555: + for (i=0; idata)[m]; ++/* unsigned int texel = (unsigned int)((unsigned short*)info->data)[m]; + unsigned int A = texel & 0x00008000 ? 0xFF000000 : 0; + unsigned int B = texel & 0x00007C00; + unsigned int G = texel & 0x000003E0; + unsigned int R = texel & 0x0000001F; + #if 0 +- /* accurate conversion */ ++ // accurate conversion + ((unsigned int*)texture)[n] = A | (R << 19) | ((R >> 2) << 16) | (G << 6) | ((G >> 8) << 8) | (B >> 7) | (B >> 12); + #else + ((unsigned int*)texture)[n] = A | (R << 19) | (G << 6) | (B >> 7); + #endif ++*/ ++ unsigned short texel = ((unsigned short*)info->data)[m]; ++ unsigned short A = (texel & 0x8000)>>15; ++ ((unsigned short*)texture)[n] = A|(texel&0x7fff)<<1; ++/* ++ unsigned short B = (texel & 0x7C00)>>9; ++ unsigned short G = texel & 0x03E0<<1; ++ unsigned short R = (texel & 0x001F)<<11; ++ ((unsigned short*)texture)[n] = A|R|G|B;*/ + m++; + n++; + } + } + factor = 2; +- glformat = GL_RGBA; ++// glformat = GL_RGBA; ++ glformat = gltexfmt = glpixfmt = GL_RGBA; ++ glpackfmt = GL_UNSIGNED_SHORT_5_5_5_1; + break; + case GR_TEXFMT_ALPHA_INTENSITY_88: +- for (i=0; idata, width*height*2); + factor = 2; + glformat = GL_LUMINANCE_ALPHA; ++ glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA; ++ glpackfmt = GL_UNSIGNED_BYTE; + break; + case GR_TEXFMT_ARGB_4444: + +@@ -567,23 +622,29 @@ + { + for (j=0; jdata)[m]; ++/* unsigned int texel = (unsigned int)((unsigned short*)info->data)[m]; + unsigned int A = texel & 0x0000F000; + unsigned int B = texel & 0x00000F00; + unsigned int G = texel & 0x000000F0; + unsigned int R = texel & 0x0000000F; + #if 0 +- /* accurate conversion */ ++ // accurate conversion + ((unsigned int*)texture)[n] = (A << 16) | (A << 12) | (R << 20) | (R << 16) | (G << 8) | (G << 4) | (B >> 4) | (B >> 8); + #else + ((unsigned int*)texture)[n] = (A << 16) | (R << 20) | (G << 8) | (B >> 4); + #endif ++*/ ++ unsigned short texel = ((unsigned short*)info->data)[m]; ++ unsigned int A = (texel & 0xF000)>>12; ++ ((unsigned short*)texture)[n] = A|(texel&0x0fff)<<4; + m++; + n++; + } + } + factor = 2; + glformat = GL_RGBA; ++ glformat = gltexfmt = glpixfmt = GL_RGBA; ++ glpackfmt = GL_UNSIGNED_SHORT_4_4_4_4; + break; + case GR_TEXFMT_ARGB_8888: + for (i=0; i 1.0f) + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, largest_supported_anisotropy); + +- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture); ++//*SEB* glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture); ++//printf("new texture, id=%x, size=%ix%i, fmt=%x/%x\n", startAddress+1, width, height, gltexfmt, glpackfmt); ++ glTexImage2D(GL_TEXTURE_2D, 0, gltexfmt, width, height, 0, glpixfmt, glpackfmt, texture); + /* + switch(info->format) + {