X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=source%2Fgles2glide64%2Fpandora.diff;fp=source%2Fgles2glide64%2Fpandora.diff;h=b1a10fec9b094e2b0f7d42be95c057f6dc4af721;hb=98e75f2d18c02c233da543560f76282f04fc796c;hp=0000000000000000000000000000000000000000;hpb=0ced54f867d36e8b324155bef49e8abfebfc3237;p=mupen64plus-pandora.git

diff --git a/source/gles2glide64/pandora.diff b/source/gles2glide64/pandora.diff
new file mode 100644
index 0000000..b1a10fe
--- /dev/null
+++ b/source/gles2glide64/pandora.diff
@@ -0,0 +1,1500 @@
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp ./Glide64/3dmath.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp	2013-09-06 22:05:28.000000000 +0200
++++ ./Glide64/3dmath.cpp	2013-09-14 09:41:13.000000000 +0200
+@@ -202,15 +202,109 @@
+   }
+ }
+ 
++#ifdef __ARM_NEON__
++void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4])
++{
++    asm volatile (
++	"vld1.32 		{d0, d1}, [%1]!			\n\t"	//q0 = m1
++	"vld1.32 		{d2, d3}, [%1]!	    	\n\t"	//q1 = m1+4
++	"vld1.32 		{d4, d5}, [%1]!	    	\n\t"	//q2 = m1+8
++	"vld1.32 		{d6, d7}, [%1]	    	\n\t"	//q3 = m1+12
++	"vld1.32 		{d16, d17}, [%0]!		\n\t"	//q8 = m0
++	"vld1.32 		{d18, d19}, [%0]!   	\n\t"	//q9 = m0+4
++	"vld1.32 		{d20, d21}, [%0]!   	\n\t"	//q10 = m0+8
++	"vld1.32 		{d22, d23}, [%0]    	\n\t"	//q11 = m0+12
++
++	"vmul.f32 		q12, q8, d0[0] 			\n\t"	//q12 = q8 * d0[0]
++	"vmul.f32 		q13, q8, d2[0] 		    \n\t"	//q13 = q8 * d2[0]
++	"vmul.f32 		q14, q8, d4[0] 		    \n\t"	//q14 = q8 * d4[0]
++	"vmul.f32 		q15, q8, d6[0]	 		\n\t"	//q15 = q8 * d6[0]
++	"vmla.f32 		q12, q9, d0[1] 			\n\t"	//q12 = q9 * d0[1]
++	"vmla.f32 		q13, q9, d2[1] 		    \n\t"	//q13 = q9 * d2[1]
++	"vmla.f32 		q14, q9, d4[1] 		    \n\t"	//q14 = q9 * d4[1]
++	"vmla.f32 		q15, q9, d6[1] 		    \n\t"	//q15 = q9 * d6[1]
++	"vmla.f32 		q12, q10, d1[0] 		\n\t"	//q12 = q10 * d0[0]
++	"vmla.f32 		q13, q10, d3[0] 		\n\t"	//q13 = q10 * d2[0]
++	"vmla.f32 		q14, q10, d5[0] 		\n\t"	//q14 = q10 * d4[0]
++	"vmla.f32 		q15, q10, d7[0] 		\n\t"	//q15 = q10 * d6[0]
++	"vmla.f32 		q12, q11, d1[1] 		\n\t"	//q12 = q11 * d0[1]
++	"vmla.f32 		q13, q11, d3[1] 		\n\t"	//q13 = q11 * d2[1]
++	"vmla.f32 		q14, q11, d5[1] 		\n\t"	//q14 = q11 * d4[1]
++	"vmla.f32 		q15, q11, d7[1]	 	    \n\t"	//q15 = q11 * d6[1]
++
++	"vst1.32 		{d24, d25}, [%2]! 		\n\t"	//d = q12
++	"vst1.32 		{d26, d27}, [%2]! 	    \n\t"	//d+4 = q13
++	"vst1.32 		{d28, d29}, [%2]! 	    \n\t"	//d+8 = q14
++	"vst1.32 		{d30, d31}, [%2] 	    \n\t"	//d+12 = q15
++
++	:"+r"(m0), "+r"(m1), "+r"(dest):
++    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
++    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
++    "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
++    "memory"
++	);
++}
++
++void Normalize_neon(float v[3])
++{
++	asm volatile (
++	"vld1.32 		{d4}, [%0]!	    		\n\t"	//d4={x,y}
++	"flds    		s10, [%0]   	    	\n\t"	//d5[0] = z
++	"sub    		%0, %0, #8   	    	\n\t"	//d5[0] = z
++	"vmul.f32 		d0, d4, d4				\n\t"	//d0= d4*d4
++	"vpadd.f32 		d0, d0, d0				\n\t"	//d0 = d[0] + d[1]
++    "vmla.f32 		d0, d5, d5				\n\t"	//d0 = d0 + d5*d5
++
++	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
++	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
++	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
++	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2
++	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
++	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
++	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d3) / 2
++	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4
++
++	"vmul.f32 		q2, q2, d0[0]			\n\t"	//d0= d2*d4
++	"vst1.32 		{d4}, [%0]!  			\n\t"	//d2={x0,y0}, d3={z0, w0}
++	"fsts    		s10, [%0]     			\n\t"	//d2={x0,y0}, d3={z0, w0}
++
++	:"+r"(v) :
++    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
++	);
++}
++
++float DotProduct_neon( float v0[3], float v1[3] )
++{
++    float dot;
++	asm volatile (
++	"vld1.32 		{d8}, [%1]!			\n\t"	//d8={x0,y0}
++	"vld1.32 		{d10}, [%2]!		\n\t"	//d10={x1,y1}
++	"flds 			s18, [%1, #0]	    \n\t"	//d9[0]={z0}
++	"flds 			s22, [%2, #0]	    \n\t"	//d11[0]={z1}
++	"vmul.f32 		d12, d8, d10		\n\t"	//d0= d2*d4
++	"vpadd.f32 		d12, d12, d12		\n\t"	//d0 = d[0] + d[1]
++	"vmla.f32 		d12, d9, d11		\n\t"	//d0 = d0 + d3*d5
++    "fmrs	        %0, s24	    		\n\t"	//r0 = s0
++	: "=r"(dot), "+r"(v0), "+r"(v1):
++    : "d8", "d9", "d10", "d11", "d12"
++
++	);
++    return dot;
++}
++
++#endif
++
+ // 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication
+ //                      and 3DNOW! 4x4 4x4 matrix multiplication
+ // 2011-01-03 Balrog - removed because is in NASM format and not 64-bit compatible
+ // This will need fixing.
++#ifndef __ARM_NEON__
+ MULMATRIX MulMatrices = MulMatricesC;
+ TRANSFORMVECTOR TransformVector = TransformVectorC;
+ TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;
+ DOTPRODUCT DotProduct = DotProductC;
+ NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;
++#endif
+ 
+ void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
+ {
+@@ -361,6 +455,7 @@
+ 
+   void math_init()
+   {
++#ifndef __ARM_NEON__
+ #ifndef _DEBUG
+     int IsSSE = FALSE;
+ #if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)
+@@ -429,4 +524,5 @@
+       }
+ 
+ #endif //_DEBUG
++#endif	//__ARM_NEON__
+     }
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h ./Glide64/3dmath.h
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h	2013-09-06 22:05:28.000000000 +0200
++++ ./Glide64/3dmath.h	2013-09-14 19:01:12.000000000 +0200
+@@ -42,7 +42,22 @@
+ void calc_sphere (VERTEX *v);
+ 
+ void math_init();
++#ifdef __ARM_NEON__
++float DotProductC(register float *v1, register float *v2);
++void NormalizeVectorC(float *v);
++void TransformVectorC(float *src, float *dst, float mat[4][4]);
++void InverseTransformVectorC (float *src, float *dst, float mat[4][4]);
++void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4]);
++void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4]);
++void Normalize_neon(float v[3]);
++float DotProduct_neon( float v0[3], float v1[3] );
+ 
++#define MulMatrices				MulMatricesC		//MultMatrix_neon
++#define TransformVector			TransformVectorC
++#define InverseTransformVector	InverseTransformVectorC
++#define DotProduct				DotProductC			//DotProduct_neon
++#define NormalizeVector			NormalizeVectorC	//Normalize_neon
++#else
+ typedef void (*MULMATRIX)(float m1[4][4],float m2[4][4],float r[4][4]); 
+ extern MULMATRIX MulMatrices;
+ typedef void (*TRANSFORMVECTOR)(float *src,float *dst,float mat[4][4]); 
+@@ -52,3 +67,4 @@
+ extern DOTPRODUCT DotProduct;
+ typedef void (*NORMALIZEVECTOR)(float *v);
+ extern NORMALIZEVECTOR NormalizeVector;
++#endif
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp ./Glide64/3dmathneon.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp	1970-01-01 01:00:00.000000000 +0100
++++ ./Glide64/3dmathneon.cpp	2013-09-13 23:05:47.000000000 +0200
+@@ -0,0 +1,133 @@
++#include "3dmath.h"
++
++static void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4])
++{
++    asm volatile (
++	"vld1.32 		{d0, d1}, [%1]!			\n\t"	//q0 = m1
++	"vld1.32 		{d2, d3}, [%1]!	    	\n\t"	//q1 = m1+4
++	"vld1.32 		{d4, d5}, [%1]!	    	\n\t"	//q2 = m1+8
++	"vld1.32 		{d6, d7}, [%1]	    	\n\t"	//q3 = m1+12
++	"vld1.32 		{d16, d17}, [%0]!		\n\t"	//q8 = m0
++	"vld1.32 		{d18, d19}, [%0]!   	\n\t"	//q9 = m0+4
++	"vld1.32 		{d20, d21}, [%0]!   	\n\t"	//q10 = m0+8
++	"vld1.32 		{d22, d23}, [%0]    	\n\t"	//q11 = m0+12
++
++	"vmul.f32 		q12, q8, d0[0] 			\n\t"	//q12 = q8 * d0[0]
++	"vmul.f32 		q13, q8, d2[0] 		    \n\t"	//q13 = q8 * d2[0]
++	"vmul.f32 		q14, q8, d4[0] 		    \n\t"	//q14 = q8 * d4[0]
++	"vmul.f32 		q15, q8, d6[0]	 		\n\t"	//q15 = q8 * d6[0]
++	"vmla.f32 		q12, q9, d0[1] 			\n\t"	//q12 = q9 * d0[1]
++	"vmla.f32 		q13, q9, d2[1] 		    \n\t"	//q13 = q9 * d2[1]
++	"vmla.f32 		q14, q9, d4[1] 		    \n\t"	//q14 = q9 * d4[1]
++	"vmla.f32 		q15, q9, d6[1] 		    \n\t"	//q15 = q9 * d6[1]
++	"vmla.f32 		q12, q10, d1[0] 		\n\t"	//q12 = q10 * d0[0]
++	"vmla.f32 		q13, q10, d3[0] 		\n\t"	//q13 = q10 * d2[0]
++	"vmla.f32 		q14, q10, d5[0] 		\n\t"	//q14 = q10 * d4[0]
++	"vmla.f32 		q15, q10, d7[0] 		\n\t"	//q15 = q10 * d6[0]
++	"vmla.f32 		q12, q11, d1[1] 		\n\t"	//q12 = q11 * d0[1]
++	"vmla.f32 		q13, q11, d3[1] 		\n\t"	//q13 = q11 * d2[1]
++	"vmla.f32 		q14, q11, d5[1] 		\n\t"	//q14 = q11 * d4[1]
++	"vmla.f32 		q15, q11, d7[1]	 	    \n\t"	//q15 = q11 * d6[1]
++
++	"vst1.32 		{d24, d25}, [%2]! 		\n\t"	//d = q12
++	"vst1.32 		{d26, d27}, [%2]! 	    \n\t"	//d+4 = q13
++	"vst1.32 		{d28, d29}, [%2]! 	    \n\t"	//d+8 = q14
++	"vst1.32 		{d30, d31}, [%2] 	    \n\t"	//d+12 = q15
++
++	:"+r"(m0), "+r"(m1), "+r"(dest):
++    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
++    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
++    "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
++    "memory"
++	);
++}
++
++static void TransformVectorNormalize_neon(float vec[3], float mtx[4][4])
++{
++	asm volatile (
++	"vld1.32 		{d0}, [%1]  			\n\t"	//Q0 = v
++	"flds    		s2, [%1, #8]  			\n\t"	//Q0 = v
++	"vld1.32 		{d18, d19}, [%0]!		\n\t"	//Q1 = m
++	"vld1.32 		{d20, d21}, [%0]!	    \n\t"	//Q2 = m+4
++	"vld1.32 		{d22, d23}, [%0]	    \n\t"	//Q3 = m+8
++
++	"vmul.f32 		q2, q9, d0[0]			\n\t"	//q2 = q9*Q0[0]
++	"vmla.f32 		q2, q10, d0[1]			\n\t"	//Q5 += Q1*Q0[1]
++	"vmla.f32 		q2, q11, d1[0]			\n\t"	//Q5 += Q2*Q0[2]
++
++    "vmul.f32 		d0, d4, d4				\n\t"	//d0 = d0*d0
++	"vpadd.f32 		d0, d0, d0				\n\t"	//d0 = d[0] + d[1]
++    "vmla.f32 		d0, d5, d5				\n\t"	//d0 = d0 + d1*d1
++
++	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
++	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
++	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
++	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2
++	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
++	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
++	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d3) / 2
++	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4
++
++	"vmul.f32 		q2, q2, d0[0]			\n\t"	//d0= d2*d4
++
++	"vst1.32 		{d4}, [%1] 	    	    \n\t"	//Q4 = m+12
++	"fsts   		s10, [%1, #8] 	    	\n\t"	//Q4 = m+12
++	: "+r"(mtx): "r"(vec)
++    : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory"
++	);
++}
++
++static void Normalize_neon(float v[3])
++{
++	asm volatile (
++	"vld1.32 		{d4}, [%0]!	    		\n\t"	//d4={x,y}
++	"flds    		s10, [%0]   	    	\n\t"	//d5[0] = z
++	"sub    		%0, %0, #8   	    	\n\t"	//d5[0] = z
++	"vmul.f32 		d0, d4, d4				\n\t"	//d0= d4*d4
++	"vpadd.f32 		d0, d0, d0				\n\t"	//d0 = d[0] + d[1]
++    "vmla.f32 		d0, d5, d5				\n\t"	//d0 = d0 + d5*d5
++
++	"vmov.f32 		d1, d0					\n\t"	//d1 = d0
++	"vrsqrte.f32 	d0, d0					\n\t"	//d0 = ~ 1.0 / sqrt(d0)
++	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
++	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d2) / 2
++	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d3
++	"vmul.f32 		d2, d0, d1				\n\t"	//d2 = d0 * d1
++	"vrsqrts.f32 	d3, d2, d0				\n\t"	//d3 = (3 - d0 * d3) / 2
++	"vmul.f32 		d0, d0, d3				\n\t"	//d0 = d0 * d4
++
++	"vmul.f32 		q2, q2, d0[0]			\n\t"	//d0= d2*d4
++	"vst1.32 		{d4}, [%0]!  			\n\t"	//d2={x0,y0}, d3={z0, w0}
++	"fsts    		s10, [%0]     			\n\t"	//d2={x0,y0}, d3={z0, w0}
++
++	:"+r"(v) :
++    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
++	);
++}
++
++static float DotProduct_neon( float v0[3], float v1[3] )
++{
++    float dot;
++	asm volatile (
++	"vld1.32 		{d8}, [%1]!			\n\t"	//d8={x0,y0}
++	"vld1.32 		{d10}, [%2]!		\n\t"	//d10={x1,y1}
++	"flds 			s18, [%1, #0]	    \n\t"	//d9[0]={z0}
++	"flds 			s22, [%2, #0]	    \n\t"	//d11[0]={z1}
++	"vmul.f32 		d12, d8, d10		\n\t"	//d0= d2*d4
++	"vpadd.f32 		d12, d12, d12		\n\t"	//d0 = d[0] + d[1]
++	"vmla.f32 		d12, d9, d11		\n\t"	//d0 = d0 + d3*d5
++    "fmrs	        %0, s24	    		\n\t"	//r0 = s0
++	: "=r"(dot), "+r"(v0), "+r"(v1):
++    : "d8", "d9", "d10", "d11", "d12"
++
++	);
++    return dot;
++}
++
++void MathInitNeon()
++{
++    MulMatrices = MultMatrix_neon;
++    //TransformVectorNormalize = TransformVectorNormalize_neon;
++    NormalizeVector = Normalize_neon;
++    DotProduct = DotProduct_neon;
++}
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp ./Glide64/Config.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp	2013-09-06 22:05:29.000000000 +0200
++++ ./Glide64/Config.cpp	2013-09-07 10:51:27.000000000 +0200
+@@ -89,7 +89,7 @@
+   { 640, 480 },
+   { 800, 600 },
+   { 960, 720 },
+-  { 856, 480 },
++  { 800, 480 },
+   { 512, 256 },
+   { 1024, 768 },
+   { 1280, 1024 },
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp ./Glide64/CRC.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp	2013-09-06 22:05:28.000000000 +0200
++++ ./Glide64/CRC.cpp	2013-09-08 13:12:00.000000000 +0200
+@@ -43,6 +43,7 @@
+ //
+ //****************************************************************
+ //*
++
+ #define CRC32_POLYNOMIAL     0x04C11DB7
+ 
+ unsigned int CRCTable[ 256 ];
+@@ -140,3 +141,4 @@
+    return Crc32;
+ }
+ //*/
++
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp ./Glide64/FBtoScreen.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp	2013-09-06 22:05:29.000000000 +0200
++++ ./Glide64/FBtoScreen.cpp	2013-09-08 11:57:33.000000000 +0200
+@@ -165,12 +165,15 @@
+     for (wxUint32 w = 0; w < 256; w++)
+     {
+       col = *(src++);
+-      r = (wxUint8)((col >> 24)&0xFF);
++      r = (wxUint8)((col >> (24+3))&0x1F);
++      g = (wxUint8)((col >> (16+2))&0x3F);
++      b = (wxUint8)((col >>  (8+3))&0x1F);
++/*      r = (wxUint8)((col >> 24)&0xFF);
+       r = (wxUint8)((float)r / 255.0f * 31.0f);
+       g = (wxUint8)((col >> 16)&0xFF);
+       g = (wxUint8)((float)g / 255.0f * 63.0f);
+       b = (wxUint8)((col >>  8)&0xFF);
+-      b = (wxUint8)((float)b / 255.0f * 31.0f);
++      b = (wxUint8)((float)b / 255.0f * 31.0f);*/	//*SEB*
+       *(dst++) = (r << 11) | (g << 5) | b;
+     }
+     src += (fb_info.width - 256);
+@@ -261,12 +264,15 @@
+             if (idx >= bound)
+               break;
+             c32 = src32[idx];
+-            r = (wxUint8)((c32 >> 24)&0xFF);
++            r = (wxUint8)((c32 >> (24+3))&0x1F);
++            g = (wxUint8)((c32 >> (16+2))&0x3F);
++            b = (wxUint8)((c32 >>  (8+3))&0x1F);
++/*            r = (wxUint8)((c32 >> 24)&0xFF);
+             r = (wxUint8)((float)r / 255.0f * 31.0f);
+             g = (wxUint8)((c32 >> 16)&0xFF);
+             g = (wxUint8)((float)g / 255.0f * 63.0f);
+             b = (wxUint8)((c32 >>  8)&0xFF);
+-            b = (wxUint8)((float)b / 255.0f * 31.0f);
++            b = (wxUint8)((float)b / 255.0f * 31.0f);*/	//*SEB*
+             a = (c32&0xFF) ? 1 : 0;
+             *(dst++) = (a<<15) | (r << 10) | (g << 5) | b;
+           }
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h ./Glide64/Gfx_1.3.h
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h	2013-09-06 22:05:29.000000000 +0200
++++ ./Glide64/Gfx_1.3.h	2013-09-08 16:22:57.000000000 +0200
+@@ -106,6 +106,8 @@
+ // ** TAKE OUT BEFORE RELEASE!!! **
+ //#define LOGGING			// log of spec functions called
+ //#define LOG_KEY			// says "Key!!!" in the log when space bar is pressed
++//#define EXT_LOGGING
++//#define PERFORMANCE
+ 
+ //#define LOG_UCODE
+ 
+@@ -120,15 +122,15 @@
+ 
+ #define FPS					// fps counter able? (not enabled necessarily)
+ 
+-#define LOGNOTKEY			 // Log if not pressing:
+-#define LOGKEY		0x11 // this key (CONTROL)
++//#define LOGNOTKEY			 // Log if not pressing:
++//#define LOGKEY		0x11 // this key (CONTROL)
+ 
+ //#define LOG_COMMANDS		// log the whole 64-bit command as (0x........, 0x........)
+ 
+ #define CATCH_EXCEPTIONS	// catch exceptions so it doesn't freeze and will report
+ 							// "The gfx plugin has caused an exception" instead.
+ 
+-#define FLUSH				// flush the file buffer. slower logging, but makes sure
++//#define FLUSH				// flush the file buffer. slower logging, but makes sure
+ 							//  the command is logged before continuing (in case of
+ 							//  crash or exception, the log will not be cut short)
+ #ifndef _ENDUSER_RELEASE_
+@@ -144,7 +146,7 @@
+ 
+ 
+ // Usually enabled
+-#define LARGE_TEXTURE_HANDLING	// allow large-textured objects to be split?
++//#define LARGE_TEXTURE_HANDLING	// allow large-textured objects to be split?
+ 
+ #ifdef ALTTAB_FIX
+ extern HHOOK hhkLowLevelKybd;
+@@ -189,7 +191,6 @@
+ 
+ int CheckKeyPressed(int key, int mask);
+ 
+-//#define PERFORMANCE
+ #ifdef PERFORMANCE
+ extern int64 perf_cur;
+ extern int64 perf_next;
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp ./Glide64/Main.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp	2013-09-06 22:05:29.000000000 +0200
++++ ./Glide64/Main.cpp	2013-09-15 17:06:29.000000000 +0200
+@@ -170,7 +170,7 @@
+ // 60=0x0, 70=0x1, 72=0x2, 75=0x3, 80=0x4, 90=0x5, 100=0x6, 85=0x7, 120=0x8, none=0xff
+ 
+ #ifdef PAULSCODE
+-#include "ae_bridge.h"
++//#include "ae_bridge.h"
+ #include "FrameSkipper.h"
+ FrameSkipper frameSkipper;
+ #endif
+@@ -1768,12 +1768,13 @@
+ EXPORT void CALL RomClosed (void)
+ {
+   VLOG ("RomClosed ()\n");
++printf("RomClosed ()\n");
+ 
+   CLOSE_RDP_LOG ();
+   CLOSE_RDP_E_LOG ();
+   rdp.window_changed = TRUE;
+   romopen = FALSE;
+-  if (fullscreen && evoodoo)
++//  if (fullscreen && evoodoo)//*SEB*
+     ReleaseGfx ();
+ }
+ 
+@@ -1973,9 +1974,6 @@
+ wxUint32 update_screen_count = 0;
+ EXPORT void CALL UpdateScreen (void)
+ {
+-#ifdef PAULSCODE
+-  frameSkipper.update();
+-#endif
+ #ifdef LOG_KEY
+   if (CheckKeyPressed(G64_VK_SPACE, 0x0001))
+   {
+@@ -2020,6 +2018,9 @@
+     no_dlist = true;
+     ClearCache ();
+     UpdateScreen();
++#ifdef PAULSCODE
++  frameSkipper.update();
++#endif
+     return;
+   }
+   //*/
+@@ -2035,11 +2036,17 @@
+       rdp.updatescreen = 1;
+       newSwapBuffers ();
+     }
++#ifdef PAULSCODE
++  frameSkipper.update();
++#endif
+     return;
+   }
+   //*/
+   if (settings.swapmode == 0)
+     newSwapBuffers ();
++#ifdef PAULSCODE
++  frameSkipper.update();
++#endif
+ }
+ 
+ static void DrawWholeFrameBufferToScreen()
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp ./Glide64/rdp.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp	2013-09-06 22:05:29.000000000 +0200
++++ ./Glide64/rdp.cpp	2013-09-13 22:23:52.000000000 +0200
+@@ -56,6 +56,10 @@
+ extern FrameSkipper frameSkipper;
+ #endif
+ 
++#ifdef PERFORMANCE
++#include "ticks.h"
++#endif
++
+ /*
+ const int NumOfFormats = 3;
+ SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
+@@ -633,18 +637,21 @@
+ 
+ EXPORT void CALL ProcessDList(void)
+ {
+-  SoftLocker lock(mutexProcessDList);
++//  SoftLocker lock(mutexProcessDList);
+ #ifdef PAULSCODE
+-  if (frameSkipper.willSkipNext() || !lock.IsOk()) //mutex is busy
++  if (frameSkipper.willSkipNext() /*|| !lock.IsOk()*/) //mutex is busy
+ #else
+-  if (!lock.IsOk()) //mutex is busy
++  if (/*!lock.IsOk()*/0) //mutex is busy
+ #endif
+   {
++// printf("Frameskip, reason=%s\n", (lock.IsOk())?"lock":"frameskip");
+     if (!fullscreen)
+       drawNoFullscreenMessage();
+     // Set an interrupt to allow the game to continue
+     *gfx.MI_INTR_REG |= 0x20;
+     gfx.CheckInterrupts();
++	*gfx.MI_INTR_REG |= 0x01;
++	gfx.CheckInterrupts();
+     return;
+   }
+ 
+@@ -717,7 +724,18 @@
+     unimp.close();
+   }
+ #endif
+-
++/*
++#ifdef PAULSCODE
++  if (frameSkipper.willSkipNext())
++  {
++	*gfx.MI_INTR_REG |= 0x20;
++	gfx.CheckInterrupts();
++	*gfx.MI_INTR_REG |= 0x01;
++	gfx.CheckInterrupts();
++	return;
++  }
++#endif
++*/
+   //* Set states *//
+   if (settings.swapmode > 0)
+     SwapOK = TRUE;
+@@ -818,7 +836,7 @@
+         rdp.pc[rdp.pc_i] = (a+8) & BMASK;
+ 
+ #ifdef PERFORMANCE
+-        perf_cur = wxDateTime::UNow();
++        perf_cur = ticksGetTicks();
+ #endif
+         // Process this instruction
+         gfx_instruction[settings.ucode][rdp.cmd0>>24] ();
+@@ -837,9 +855,13 @@
+         }
+ 
+ #ifdef PERFORMANCE
+-        perf_next = wxDateTime::UNow();
+-        sprintf (out_buf, "perf %08lx: %016I64d\n", a-8, (perf_next-perf_cur).Format(_T("%l")).mb_str());
++        perf_next = ticksGetTicks();
++        sprintf (out_buf, "perf %08x: %lli\n", a-8, (perf_next-perf_cur));
++#ifdef RDP_LOGGING
+         rdp_log << out_buf;
++#else
++		printf(out_buf);
++#endif
+ #endif
+ 
+       } while (!rdp.halt);
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp ./Glide64/Util.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp	2013-09-06 22:05:29.000000000 +0200
++++ ./Glide64/Util.cpp	2013-09-08 12:39:52.000000000 +0200
+@@ -289,29 +289,29 @@
+   deltaZ = dzdx = 0;
+   if (linew == 0 && (fb_depth_render_enabled || (rdp.rm & 0xC00) == 0xC00))
+   {
+-    double X0 = vtx[0]->sx / rdp.scale_x;
+-    double Y0 = vtx[0]->sy / rdp.scale_y;
+-    double X1 = vtx[1]->sx / rdp.scale_x;
+-    double Y1 = vtx[1]->sy / rdp.scale_y;
+-    double X2 = vtx[2]->sx / rdp.scale_x;
+-    double Y2 = vtx[2]->sy / rdp.scale_y;
+-    double diffy_02 = Y0 - Y2;
+-    double diffy_12 = Y1 - Y2;
+-    double diffx_02 = X0 - X2;
+-    double diffx_12 = X1 - X2;
+-
+-    double denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02);
+-    if(denom*denom > 0.0)
+-    {
+-      double diffz_02 = vtx[0]->sz - vtx[2]->sz;
+-      double diffz_12 = vtx[1]->sz - vtx[2]->sz;
+-      double fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom;
++    float X0 = vtx[0]->sx / rdp.scale_x;
++    float Y0 = vtx[0]->sy / rdp.scale_y;
++    float X1 = vtx[1]->sx / rdp.scale_x;
++    float Y1 = vtx[1]->sy / rdp.scale_y;
++    float X2 = vtx[2]->sx / rdp.scale_x;
++    float Y2 = vtx[2]->sy / rdp.scale_y;
++    float diffy_02 = Y0 - Y2;
++    float diffy_12 = Y1 - Y2;
++    float diffx_02 = X0 - X2;
++    float diffx_12 = X1 - X2;
++
++    float denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02);
++    if(denom*denom > 0.0f)
++    {
++      float diffz_02 = vtx[0]->sz - vtx[2]->sz;
++      float diffz_12 = vtx[1]->sz - vtx[2]->sz;
++      float fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom;
+       if ((rdp.rm & 0xC00) == 0xC00) {
+         // Calculate deltaZ per polygon for Decal z-mode
+-        double fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom;
+-        double fdz = fabs(fdzdx) + fabs(fdzdy);
++        float fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom;
++        float fdz = fabs(fdzdx) + fabs(fdzdy);
+         if ((settings.hacks & hack_Zelda) && (rdp.rm & 0x800))
+-          fdz *= 4.0;  // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads
++          fdz *= 4.0f;  // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads
+         deltaZ = max(8, (int)fdz);
+       }
+       dzdx = (int)(fdzdx * 65536.0);
+@@ -881,12 +881,12 @@
+ //*/
+ 
+ typedef struct {
+-  double d;
+-  double x;
+-  double y;
++  float d;		//*SEB* was doubles
++  float x;
++  float y;
+ } LineEuqationType;
+ 
+-static double EvaLine(LineEuqationType &li, double x, double y)
++static float EvaLine(LineEuqationType &li, float x, float y)	//*SEB* all double before
+ {
+   return li.x*x+li.y*y+li.d;
+ }
+@@ -906,7 +906,7 @@
+ }
+ 
+ 
+-__inline double interp3p(float a, float b, float c, double r1, double r2)
++__inline float interp3p(float a, float b, float c, float r1, float r2)	//*SEB* r1 and r2 and function was double
+ {
+   return (a)+(((b)+((c)-(b))*(r2))-(a))*(r1);
+ }
+@@ -915,34 +915,34 @@
+   (a+(((b)+((c)-(b))*(r2))-(a))*(r1))
+ */
+ 
+-static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out)
++static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out)	//*SEB* all double before
+ {
+ 
+   LineEuqationType line;
+   Create1LineEq(line, v2, v3, v1);
+ 
+-  double aDot = (out.x*line.x + out.y*line.y);
+-  double bDot = (v1.sx*line.x + v1.sy*line.y);
++  float aDot = (out.x*line.x + out.y*line.y);
++  float bDot = (v1.sx*line.x + v1.sy*line.y);
+ 
+-  double scale1 = ( - line.d - aDot) / ( bDot - aDot );
++  float scale1 = ( - line.d - aDot) / ( bDot - aDot );
+ 
+-  double tx = out.x + scale1 * (v1.sx - out.x);
+-  double ty = out.y + scale1 * (v1.sy - out.y);
++  float tx = out.x + scale1 * (v1.sx - out.x);
++  float ty = out.y + scale1 * (v1.sy - out.y);
+ 
+-  double s1 = 101.0, s2 = 101.0;
+-  double den = tx - v1.sx;
+-  if (fabs(den) > 1.0)
++  float s1 = 101.0, s2 = 101.0;
++  float den = tx - v1.sx;
++  if (fabsf(den) > 1.0)
+     s1 = (out.x-v1.sx)/den;
+   if (s1 > 100.0f)
+     s1 = (out.y-v1.sy)/(ty-v1.sy);
+ 
+   den = v3.sx - v2.sx;
+-  if (fabs(den) > 1.0)
++  if (fabsf(den) > 1.0)
+     s2 = (tx-v2.sx)/den;
+   if (s2 > 100.0f)
+     s2 =(ty-v2.sy)/(v3.sy-v2.sy);
+ 
+-  double w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2);
++  float w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2);
+ 
+   out.r = real_to_char(interp3p(v1.r*v1.oow,v2.r*v2.oow,v3.r*v3.oow,s1,s2)*w);
+   out.g = real_to_char(interp3p(v1.g*v1.oow,v2.g*v2.oow,v3.g*v3.oow,s1,s2)*w);
+@@ -976,8 +976,8 @@
+   */
+   float deltaS, deltaT;
+   float deltaX, deltaY;
+-  double deltaTexels, deltaPixels, lodFactor = 0;
+-  double intptr;
++  float deltaTexels, deltaPixels, lodFactor = 0;	//*SEB* double before
++  float intptr;										//*SEB* double before
+   float s_scale = rdp.tiles[rdp.cur_tile].width / 255.0f;
+   float t_scale = rdp.tiles[rdp.cur_tile].height / 255.0f;
+   if (settings.lodmode == 1)
+@@ -1019,7 +1019,7 @@
+   float lod_fraction = 1.0f;
+   if (lod_tile < rdp.cur_tile + rdp.mipmap_level)
+   {
+-  	lod_fraction = max((float)modf(lodFactor / pow(2.,lod_tile),&intptr), rdp.prim_lodmin / 255.0f);
++  	lod_fraction = max((float)modff(lodFactor / powf(2.,lod_tile),&intptr), (float)rdp.prim_lodmin / 255.0f);
+   }
+   float detailmax;
+   if (cmb.dc0_detailmax < 0.5f)
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp ./GlideHQ/TxDbg.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp	2013-09-06 22:05:30.000000000 +0200
++++ ./GlideHQ/TxDbg.cpp	2013-09-07 12:06:11.000000000 +0200
+@@ -28,6 +28,8 @@
+ #include <stdarg.h>
+ #include <string>
+ 
++#define _GLIBCXX_HAVE_BROKEN_VSWPRINTF	1
++
+ TxDbg::TxDbg()
+ {
+   _level = DBG_LEVEL;
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp ./Glitch64/combiner.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp	2013-09-06 22:05:30.000000000 +0200
++++ ./Glitch64/combiner.cpp	2013-09-14 10:16:36.000000000 +0200
+@@ -29,6 +29,8 @@
+ #include "glide.h"
+ #include "main.h"
+ 
++#define GLchar	char
++
+ void vbo_draw();
+ 
+ static int fct[4], source0[4], operand0[4], source1[4], operand1[4], source2[4], operand2[4];
+@@ -117,10 +119,11 @@
+ // using gl_FragCoord is terribly slow on ATI and varying variables don't work for some unknown
+ // reason, so we use the unused components of the texture2 coordinates
+ static const char* fragment_shader_dither =
+-"  float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n"
++" \n"
++/*"  float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n"
+ "  float dithy = (gl_TexCoord[2].a + 1.0)*0.5*1000.0; \n"
+ "  if(texture2D(ditherTex, vec2((dithx-32.0*floor(dithx/32.0))/32.0, \n"
+-"                               (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n"
++"                               (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n"*/
+ ;
+ 
+ static const char* fragment_shader_default =
+@@ -165,11 +168,16 @@
+ "}                               \n"
+ ;
+ 
++static const char* fragment_shader_alt_end =
++"                                \n"
++"}                               \n"
++;
++
+ static const char* vertex_shader =
+ SHADER_HEADER
+ "#define Z_MAX 65536.0                                          \n"
+ "attribute highp vec4 aVertex;                                  \n"
+-"attribute highp vec4 aColor;                                   \n"
++"attribute mediump vec4 aColor;                                   \n"	//*SEB* highp -> lowp
+ "attribute highp vec4 aMultiTexCoord0;                          \n"
+ "attribute highp vec4 aMultiTexCoord1;                          \n"
+ "attribute float aFog;                                          \n"
+@@ -245,7 +253,7 @@
+ 
+   // creating a fake texture
+   glBindTexture(GL_TEXTURE_2D, default_texture);
+-  glTexImage2D(GL_TEXTURE_2D, 0, 3, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
++  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ 
+@@ -286,7 +294,7 @@
+     strlen(fragment_shader_end)+1);
+   strcpy(fragment_shader, fragment_shader_header);
+   strcat(fragment_shader, fragment_shader_default);
+-  strcat(fragment_shader, fragment_shader_end);
++  strcat(fragment_shader, fragment_shader_end);	/*SEB*/
+   glShaderSource(fragment_shader_object, 1, (const GLchar**)&fragment_shader, NULL);
+   free(fragment_shader);
+ 
+@@ -408,6 +416,7 @@
+   int dither_enabled;
+   int blackandwhite0;
+   int blackandwhite1;
++  int alpha_test;			//*SEB*
+   GLuint fragment_shader_object;
+   GLuint program_object;
+   int texture0_location;
+@@ -489,6 +498,8 @@
+   int i;
+   int chroma_color_location;
+   int log_length;
++  
++  int noalpha;
+ 
+   need_to_compile = 0;
+ 
+@@ -502,6 +513,7 @@
+       prog.texture0_combinera == texture0_combinera_key &&
+       prog.texture1_combinera == texture1_combinera_key &&
+       prog.fog_enabled == fog_enabled &&
++	  prog.alpha_test == alpha_test &&				//*SEB*
+       prog.chroma_enabled == chroma_enabled &&
+       prog.dither_enabled == dither_enabled &&
+       prog.blackandwhite0 == blackandwhite0 &&
+@@ -514,11 +526,13 @@
+     }
+   }
+ 
+-  if(shader_programs != NULL)
+-    shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key));
++  if(shader_programs != NULL) {
++	if ((number_of_programs+1)>1024)
++		shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key));
++  }
+   else
+-    shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key));
+-  //printf("number of shaders %d\n", number_of_programs);
++    shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key)*1024);
++	//printf("number of shaders %d\n", number_of_programs);
+ 
+   shader_programs[number_of_programs].color_combiner = color_combiner_key;
+   shader_programs[number_of_programs].alpha_combiner = alpha_combiner_key;
+@@ -531,6 +545,7 @@
+   shader_programs[number_of_programs].dither_enabled = dither_enabled;
+   shader_programs[number_of_programs].blackandwhite0 = blackandwhite0;
+   shader_programs[number_of_programs].blackandwhite1 = blackandwhite1;
++  shader_programs[number_of_programs].alpha_test = alpha_test;		//*SEB*
+ 
+   if(chroma_enabled)
+   {
+@@ -557,7 +572,10 @@
+   strcat(fragment_shader, fragment_shader_color_combiner);
+   strcat(fragment_shader, fragment_shader_alpha_combiner);
+   if(fog_enabled) strcat(fragment_shader, fragment_shader_fog);
+-  strcat(fragment_shader, fragment_shader_end);
++  if (alpha_test)
++		strcat(fragment_shader, fragment_shader_end);
++  else
++		strcat(fragment_shader, fragment_shader_alt_end);		//*SEB*
+   if(chroma_enabled) strcat(fragment_shader, fragment_shader_chroma);
+ 
+   shader_programs[number_of_programs].fragment_shader_object = glCreateShader(GL_FRAGMENT_SHADER);
+@@ -1719,7 +1737,7 @@
+   glActiveTexture(GL_TEXTURE2);
+   glEnable(GL_TEXTURE_2D);
+   glBindTexture(GL_TEXTURE_2D, 33*1024*1024);
+-  glTexImage2D(GL_TEXTURE_2D, 0, 4, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
++  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+   glDisable(GL_TEXTURE_2D);
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp ./Glitch64/geometry.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp	2013-09-06 22:05:30.000000000 +0200
++++ ./Glitch64/geometry.cpp	2013-09-12 22:13:33.000000000 +0200
+@@ -34,7 +34,7 @@
+ #define VERTEX_SIZE sizeof(VERTEX) //Size of vertex struct
+ 
+ #ifdef PAULSCODE
+-#include "ae_bridge.h"
++//#include "ae_bridge.h"
+ static float polygonOffsetFactor;
+ static float polygonOffsetUnits;
+ #endif
+@@ -338,8 +338,11 @@
+ void FindBestDepthBias()
+ {
+ #ifdef PAULSCODE
+-  int hardwareType = Android_JNI_GetHardwareType();
+-  Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits);
++/*  int hardwareType = Android_JNI_GetHardwareType();
++  Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits);*/
++//  glPolygonOffset(0.2f, 0.2f);
++	polygonOffsetFactor=0.2f;
++	polygonOffsetUnits=0.2f;
+ #else
+   float f, bestz = 0.25f;
+   int x;
+@@ -386,7 +389,11 @@
+   if (level)
+   {
+     #ifdef PAULSCODE
+-    glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits);
++//    glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits);
++    if(w_buffer_mode)
++      glPolygonOffset(1.0f, -(float)level*polygonOffsetUnits);
++    else
++      glPolygonOffset(0, (float)level*3.0f);
+     #else
+     if(w_buffer_mode)
+       glPolygonOffset(1.0f, -(float)level*zscale/255.0f);
+@@ -408,13 +415,13 @@
+ grDrawTriangle( const void *a, const void *b, const void *c )
+ {
+   LOG("grDrawTriangle()\r\n\t");
+-  
++/*  
+   if(nvidia_viewport_hack && !render_to_texture)
+   {
+     glViewport(0, viewport_offset, viewport_width, viewport_height);
+     nvidia_viewport_hack = 0;
+   }
+-
++*/
+   reloadTexture();
+ 
+   if(need_to_compile) compile_shader();
+@@ -588,13 +595,13 @@
+ {
+   void **pointers = (void**)pointers2;
+   LOG("grDrawVertexArray(%d,%d)\r\n", mode, Count);
+-
++/*
+   if(nvidia_viewport_hack && !render_to_texture)
+   {
+     glViewport(0, viewport_offset, viewport_width, viewport_height);
+     nvidia_viewport_hack = 0;
+   }
+-
++*/
+   reloadTexture();
+ 
+   if(need_to_compile) compile_shader();
+@@ -612,13 +619,13 @@
+ grDrawVertexArrayContiguous(FxU32 mode, FxU32 Count, void *pointers, FxU32 stride)
+ {
+   LOG("grDrawVertexArrayContiguous(%d,%d,%d)\r\n", mode, Count, stride);
+-
++/*
+   if(nvidia_viewport_hack && !render_to_texture)
+   {
+     glViewport(0, viewport_offset, viewport_width, viewport_height);
+     nvidia_viewport_hack = 0;
+   }
+-
++*/
+   if(stride != 156)
+   {
+ 	  LOGINFO("Incompatible stride\n");
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp ./Glitch64/glitchmain.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp	2013-09-06 22:05:30.000000000 +0200
++++ ./Glitch64/glitchmain.cpp	2013-09-15 17:13:49.000000000 +0200
+@@ -656,6 +656,9 @@
+ #ifdef _WIN32
+   glCompressedTexImage2DARB = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)wglGetProcAddress("glCompressedTexImage2DARB");
+ #endif
++/*SEB*/
++  glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
++  glPixelStorei(GL_PACK_ALIGNMENT, 1);
+ 
+ 
+ #ifdef _WIN32
+@@ -806,6 +809,7 @@
+     fullscreen = 0;
+   }
+ #else
++  CoreVideo_Quit();
+   //SDL_QuitSubSystem(SDL_INIT_VIDEO);
+   //sleep(2);
+ #endif
+@@ -823,7 +827,7 @@
+   int i;
+   static int fbs_init = 0;
+ 
+-  //printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
++	//printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
+   LOG("grTextureBufferExt(%d, %d, %d, %d %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
+   if (lodmin != lodmax) display_warning("grTextureBufferExt : loading more than one LOD");
+   if (!use_fbo) {
+@@ -907,8 +911,8 @@
+       tmu_usage[rtmu].min = pBufferAddress;
+     if ((unsigned int) tmu_usage[rtmu].max < pBufferAddress+size)
+       tmu_usage[rtmu].max = pBufferAddress+size;
+-    //   printf("tmu %d usage now %gMb - %gMb\n",
+-    //          rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f);
++	//printf("tmu %d usage now %gMb - %gMb\n",
++    //      rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f);
+ 
+ 
+     width = pBufferWidth;
+@@ -927,14 +931,14 @@
+     texbufs[i].fmt = fmt;
+     if (i == texbuf_i)
+       texbuf_i = (texbuf_i+1)&(NB_TEXBUFS-1);
+-    //printf("texbuf %x fmt %x\n", pBufferAddress, fmt);
++	//printf("texbuf %x fmt %x\n", pBufferAddress, fmt);
+ 
+     // ZIGGY it speeds things up to not delete the buffers
+     // a better thing would be to delete them *sometimes*
+     //   remove_tex(pBufferAddress+1, pBufferAddress + size);
+     add_tex(pBufferAddress);
+ 
+-    //printf("viewport %dx%d\n", width, height);
++	//printf("viewport %dx%d\n", width, height);
+     if (height > screen_height) {
+       glViewport( 0, viewport_offset + screen_height - height, width, height);
+     } else
+@@ -1009,7 +1013,6 @@
+         }
+       }
+     }
+-
+     remove_tex(pBufferAddress, pBufferAddress + width*height*2/*grTexFormatSize(fmt)*/);
+     //create new FBO
+     glGenFramebuffers( 1, &(fbs[nb_fb].fbid) );
+@@ -1768,6 +1771,7 @@
+           GrLfbInfo_t *info )
+ {
+   LOG("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline);
++//printf("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline);
+   if (type == GR_LFB_WRITE_ONLY)
+   {
+     display_warning("grLfbLock : write only");
+@@ -1792,12 +1796,32 @@
+     if(buffer != GR_BUFFER_AUXBUFFER)
+     {
+       if (writeMode == GR_LFBWRITEMODE_888) {
++/*SEB*/
++        buf = (unsigned char*)malloc(width*height*4);
+         //printf("LfbLock GR_LFBWRITEMODE_888\n");
+         info->lfbPtr = frameBuffer;
+         info->strideInBytes = width*4;
+         info->writeMode = GR_LFBWRITEMODE_888;
+         info->origin = origin;
+         //glReadPixels(0, viewport_offset, width, height, GL_BGRA, GL_UNSIGNED_BYTE, frameBuffer);
++        glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
++
++/*SEB*/
++	    unsigned char *p=buf;
++        for (j=0; j<height; j++)
++        {
++	    short unsigned int *f=frameBuffer+(height-j-1)*width;
++          for (i=0; i<width; i++)
++          {
++            *(f++) =
++              (*(p)   <<24) |
++              (*(p+1) <<16) |
++              (*(p+2) << 8) |
++	          (0xff);
++              p+=4;
++          }
++        }
++        free(buf);
+       } else {
+         buf = (unsigned char*)malloc(width*height*4);
+ 
+@@ -1807,14 +1831,22 @@
+         info->origin = origin;
+         glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
+ 
++/*SEB*/
++	    unsigned char *p=buf;
+         for (j=0; j<height; j++)
+         {
++	      short unsigned int *f=frameBuffer+(height-j-1)*width;
+           for (i=0; i<width; i++)
+           {
+-            frameBuffer[(height-j-1)*width+i] =
++/*            frameBuffer[(height-j-1)*width+i] =
+               ((buf[j*width*4+i*4+0] >> 3) << 11) |
+               ((buf[j*width*4+i*4+1] >> 2) <<  5) |
+-              (buf[j*width*4+i*4+2] >> 3);
++              (buf[j*width*4+i*4+2] >> 3);*/
++            *(f++) =
++              ((*(p)   >> 3) << 11) |
++              ((*(p+1) >> 2) <<  5) |
++              (*(p+2)  >> 3);
++              p+=4;
+           }
+         }
+         free(buf);
+@@ -1826,6 +1858,7 @@
+       info->strideInBytes = width*2;
+       info->writeMode = GR_LFBWRITEMODE_ZA16;
+       info->origin = origin;
++      //*SEB* *TODO* check alignment
+       glReadPixels(0, viewport_offset, width, height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer);
+     }
+   }
+@@ -1855,6 +1888,7 @@
+   unsigned short *frameBuffer = (unsigned short*)dst_data;
+   unsigned short *depthBuffer = (unsigned short*)dst_data;
+   LOG("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride);
++//printf("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride);
+ 
+   switch(src_buffer)
+   {
+@@ -1876,15 +1910,22 @@
+     buf = (unsigned char*)malloc(src_width*src_height*4);
+ 
+     glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
+-
+     for (j=0; j<src_height; j++)
+     {
++/*SEB*/
++      unsigned char *p=buf+(src_height-j-1)*src_width*4;
++      unsigned short *f=frameBuffer+(j*dst_stride/2);
+       for (i=0; i<src_width; i++)
+       {
+-        frameBuffer[j*(dst_stride/2)+i] =
++/*        frameBuffer[j*(dst_stride/2)+i] =
+           ((buf[(src_height-j-1)*src_width*4+i*4+0] >> 3) << 11) |
+           ((buf[(src_height-j-1)*src_width*4+i*4+1] >> 2) <<  5) |
+-          (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3);
++          (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3);*/
++        *(f++) =
++          ((*(p) >> 3) << 11) |
++          ((*(p+1) >> 2) <<  5) |
++          (*(p+2) >> 3);
++	  p+=4;
+       }
+     }
+     free(buf);
+@@ -1892,15 +1933,19 @@
+   else
+   {
+     buf = (unsigned char*)malloc(src_width*src_height*2);
+-
+-    glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer);
++//*SEB read in buf, not depthBuffer.
++    glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, buf);
+ 
+     for (j=0;j<src_height; j++)
+     {
++//*SEB*
++      unsigned short *d=depthBuffer+j*dst_stride/2;
++      unsigned short *p=(unsigned short*)buf+(src_height-j-1)*src_width; //orignal look fishy. why *4???
+       for (i=0; i<src_width; i++)
+       {
+-        depthBuffer[j*(dst_stride/2)+i] =
+-          ((unsigned short*)buf)[(src_height-j-1)*src_width*4+i*4];
++/*        depthBuffer[j*(dst_stride/2)+i] =
++          ((unsigned short*)buf)[(src_height-j-1)*src_width*4+i*4];*/
++        *(d++) = *(p++); //why *4 (prob. GL_PACK was=4), plus transcoding to short, that make *8 ???
+       }
+     }
+     free(buf);
+@@ -1923,6 +1968,7 @@
+   int texture_number;
+   unsigned int tex_width = 1, tex_height = 1;
+   LOG("grLfbWriteRegion(%d,%d,%d,%d,%d,%d,%d,%d)\r\n",dst_buffer, dst_x, dst_y, src_format, src_width, src_height, pixelPipeline, src_stride);
++//printf("grLfbWriteRegion(%d,%d,%d,%d,%d,%d,%d,%d)\r\n",dst_buffer, dst_x, dst_y, src_format, src_width, src_height, pixelPipeline, src_stride);
+ 
+   //glPushAttrib(GL_ALL_ATTRIB_BITS);
+ 
+@@ -1949,6 +1995,12 @@
+     glActiveTexture(texture_number);
+ 
+     const unsigned int half_stride = src_stride / 2;
++
++    const int comp_stride = half_stride - src_width;
++    const int comp_tex = (tex_width - src_width)*4;
++    unsigned short *f=frameBuffer;
++    unsigned char *p=buf;
++
+     switch(src_format)
+     {
+     case GR_LFB_SRC_FMT_1555:
+@@ -1956,12 +2008,20 @@
+       {
+         for (i=0; i<src_width; i++)
+         {
+-          const unsigned int col = frameBuffer[j*half_stride+i];
++/*          const unsigned int col = frameBuffer[j*half_stride+i];
+           buf[j*tex_width*4+i*4+0]=((col>>10)&0x1F)<<3;
+           buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3;
+           buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
+-          buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0;
++          buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0;*/
++          const unsigned int col = *(f++);
++          *(p)=((col>>10)&0x1F)<<3;
++          *(p+1)=((col>>5)&0x1F)<<3;
++          *(p+2)=((col>>0)&0x1F)<<3;
++          *(p+3)= (col>>15) ? 0xFF : 0;
++	  p+=4;
+         }
++	p+=comp_tex;
++	f+=comp_stride;
+       }
+       break;
+     case GR_LFBWRITEMODE_555:
+@@ -1969,12 +2029,20 @@
+       {
+         for (i=0; i<src_width; i++)
+         {
+-          const unsigned int col = frameBuffer[j*half_stride+i];
++/*          const unsigned int col = frameBuffer[j*half_stride+i];
+           buf[j*tex_width*4+i*4+0]=((col>>10)&0x1F)<<3;
+           buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3;
+           buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
+-          buf[j*tex_width*4+i*4+3]=0xFF;
++          buf[j*tex_width*4+i*4+3]=0xFF;*/
++          const unsigned int col = *(f++);
++          *(p)=((col>>10)&0x1F)<<3;
++          *(p+1)=((col>>5)&0x1F)<<3;
++          *(p+2)=((col>>0)&0x1F)<<3;
++          *(p+3)=0xFF;
++	  p+=4;
+         }
++	p+=comp_tex;
++	f+=comp_stride;
+       }
+       break;
+     case GR_LFBWRITEMODE_565:
+@@ -1982,12 +2050,20 @@
+       {
+         for (i=0; i<src_width; i++)
+         {
+-          const unsigned int col = frameBuffer[j*half_stride+i];
++/*          const unsigned int col = frameBuffer[j*half_stride+i];
+           buf[j*tex_width*4+i*4+0]=((col>>11)&0x1F)<<3;
+           buf[j*tex_width*4+i*4+1]=((col>>5)&0x3F)<<2;
+           buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
+-          buf[j*tex_width*4+i*4+3]=0xFF;
++          buf[j*tex_width*4+i*4+3]=0xFF;*/
++          const unsigned int col = *(f++);
++          *(p)=((col>>11)&0x1F)<<3;
++          *(p+1)=((col>>5)&0x3F)<<2;
++          *(p+2)=((col>>0)&0x1F)<<3;
++          *(p+3)=0xFF;
++	  p+=4;
+         }
++	p+=comp_tex;
++	f+=comp_stride;
+       }
+       break;
+     default:
+@@ -2006,7 +2082,7 @@
+ #endif
+ 
+     glBindTexture(GL_TEXTURE_2D, default_texture);
+-    glTexImage2D(GL_TEXTURE_2D, 0, 4, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
++    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
+     free(buf);
+ 
+     set_copy_shader();
+diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp ./Glitch64/textures.cpp
+--- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp	2013-09-06 22:05:31.000000000 +0200
++++ ./Glitch64/textures.cpp	2013-09-13 11:32:50.000000000 +0200
+@@ -26,6 +26,7 @@
+ #include "glide.h"
+ #include "main.h"
+ #include <stdio.h>
++#include <string.h>
+ 
+ /* Napalm extensions to GrTextureFormat_t */
+ #define GR_TEXFMT_ARGB_CMP_FXT1           0x11
+@@ -107,7 +108,7 @@
+   }
+   glDeleteTextures(n, t);
+   free(t);
+-  //printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax);
++//printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax);
+ }
+ 
+ 
+@@ -115,7 +116,7 @@
+ {
+   texlist *aux = list;
+   texlist *aux2;
+-  //printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id);
++//printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id);
+   if (list == NULL || id < list->id)
+   {
+     nbTex++;
+@@ -435,8 +436,11 @@
+     factor = -1;
+   else
+     factor = grTexFormat2GLPackedFmt(info->format, &gltexfmt, &glpixfmt, &glpackfmt);
+-
++//printf("grTexDownloadMipmap, id=%x, size=%ix%i, format=%x\n", startAddress+1, width, height, info->format);
+   if (factor < 0) {
++    gltexfmt = GL_RGBA;
++    glpixfmt = GL_RGBA;
++    glpackfmt = GL_UNSIGNED_BYTE;
+ 
+     // VP fixed the texture conversions to be more accurate, also swapped
+     // the for i/j loops so that is is less likely to break the memory cache
+@@ -444,7 +448,7 @@
+     switch(info->format)
+     {
+     case GR_TEXFMT_ALPHA_8:
+-      for (i=0; i<height; i++)
++ /*     for (i=0; i<height; i++)
+       {
+         for (j=0; j<width; j++)
+         {
+@@ -457,10 +461,25 @@
+         }
+       }
+       factor = 1;
+-      glformat = GL_RGBA;
++      glformat = GL_RGBA;*/
++
++     for (i=0; i<height; i++)
++      {
++        for (j=0; j<width; j++)
++        {
++          unsigned short texel = (unsigned short)((unsigned char*)info->data)[m];
++          ((unsigned short*)texture)[n] = texel|(texel<<8);
++          m++;
++          n++;
++        }
++      }
++
++      glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
++      glpackfmt = GL_UNSIGNED_BYTE;
++      factor = 1;
+       break;
+     case GR_TEXFMT_INTENSITY_8: // I8 support - H.Morii
+-      for (i=0; i<height; i++)
++/*      for (i=0; i<height; i++)
+       {
+         for (j=0; j<width; j++)
+         {
+@@ -470,9 +489,13 @@
+           m++;
+           n++;
+         }
+-      }
++      }*/
++      factor = 1;
++//      glformat = GL_ALPHA;
++      memcpy(texture, info->data, width*height);
++      glformat = gltexfmt = glpixfmt = GL_LUMINANCE;
++      glpackfmt = GL_UNSIGNED_BYTE;
+       factor = 1;
+-      glformat = GL_ALPHA;
+       break;
+     case GR_TEXFMT_ALPHA_INTENSITY_44:
+ #if 1
+@@ -480,9 +503,9 @@
+       {
+         for (j=0; j<width; j++)
+         {
+-          unsigned int texel = (unsigned int)((unsigned char*)info->data)[m];
++/*          unsigned int texel = (unsigned int)((unsigned char*)info->data)[m];
+ #if 1
+-          /* accurate conversion */
++          // accurate conversion
+           unsigned int texel_hi = (texel & 0x000000F0) << 20;
+           unsigned int texel_low = texel & 0x0000000F;
+           texel_low |= (texel_low << 4);
+@@ -493,61 +516,90 @@
+           texel_hi |= ((texel_low << 16) | (texel_low << 8) | texel_low);
+ #endif
+           ((unsigned int*)texture)[n] = texel_hi;
++*/
++	  unsigned char texel = ((unsigned char*)info->data)[m];
++          unsigned short texel_hi = (texel & 0x000000F0) << 4;
++          unsigned short texel_low = texel & 0x0000000F;
++          texel_low |= (texel_low << 4);
++          texel_hi |= ((texel_hi << 4) | (texel_low));
++	  ((unsigned short*)texture)[n] = texel_hi;
+           m++;
+           n++;
+         }
+       }
+       factor = 1;
+-      glformat = GL_LUMINANCE_ALPHA;
++      glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
++      glpackfmt = GL_UNSIGNED_BYTE;
++//      glformat = GL_LUMINANCE_ALPHA;
+ #endif
+       break;
+     case GR_TEXFMT_RGB_565:
+-      for (i=0; i<height; i++)
++/*      for (i=0; i<height; i++)
+       {
+         for (j=0; j<width; j++)
+-        {
+-          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
++        {*/
++/*          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
+           unsigned int B = texel & 0x0000F800;
+           unsigned int G = texel & 0x000007E0;
+           unsigned int R = texel & 0x0000001F;
+ #if 0
+-          /* accurate conversion */
++          // accurate conversion 
+           ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | ((R >> 2) << 16) | (G << 5) | ((G >> 9) << 8) | (B >> 8) | (B >> 13);
+ #else
+           ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | (G << 5) | (B >> 8);
+ #endif
++*/
++/*	  const unsigned short texel = ((unsigned short*)info->data)[m];
++          const unsigned short B = (texel & 0xF800)>>11;
++          const unsigned short G = texel & 0x07E0;
++          const unsigned short R = (texel & 0x001F)<<11;
++          ((unsigned short*)texture)[n] = R|G|B;
+           m++;
+           n++;
+         }
+-      }
++      }*/
++      memcpy(texture, info->data, width*height*2);
+       factor = 2;
+-      glformat = GL_RGB;
++//      glformat = GL_RGB;
++      glformat = gltexfmt = glpixfmt = GL_RGB;
++      glpackfmt = GL_UNSIGNED_SHORT_5_6_5;
+       break;
+     case GR_TEXFMT_ARGB_1555:
+       for (i=0; i<height; i++)
+       {
+         for (j=0; j<width; j++)
+         {
+-          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
++/*          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
+           unsigned int A = texel & 0x00008000 ? 0xFF000000 : 0;
+           unsigned int B = texel & 0x00007C00;
+           unsigned int G = texel & 0x000003E0;
+           unsigned int R = texel & 0x0000001F;
+ #if 0
+-          /* accurate conversion */
++          // accurate conversion
+           ((unsigned int*)texture)[n] = A | (R << 19) | ((R >> 2) << 16) | (G << 6) | ((G >> 8) << 8) | (B >> 7) | (B >> 12);
+ #else
+           ((unsigned int*)texture)[n] = A | (R << 19) | (G << 6) | (B >> 7);
+ #endif
++*/
++          unsigned short texel = ((unsigned short*)info->data)[m];
++          unsigned short A = (texel & 0x8000)>>15;
++ 	  ((unsigned short*)texture)[n] = A|(texel&0x7fff)<<1;
++/*
++          unsigned short B = (texel & 0x7C00)>>9;
++          unsigned short G = texel & 0x03E0<<1;
++          unsigned short R = (texel & 0x001F)<<11;
++          ((unsigned short*)texture)[n] = A|R|G|B;*/
+           m++;
+           n++;
+         }
+       }
+       factor = 2;
+-      glformat = GL_RGBA;
++//      glformat = GL_RGBA;
++      glformat = gltexfmt = glpixfmt = GL_RGBA;
++      glpackfmt = GL_UNSIGNED_SHORT_5_5_5_1;
+       break;
+     case GR_TEXFMT_ALPHA_INTENSITY_88:
+-      for (i=0; i<height; i++)
++/*      for (i=0; i<height; i++)
+       {
+         for (j=0; j<width; j++)
+         {
+@@ -557,9 +609,12 @@
+           m++;
+           n++;
+         }
+-      }
++      }*/
++      memcpy(texture, info->data, width*height*2);
+       factor = 2;
+       glformat = GL_LUMINANCE_ALPHA;
++      glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
++      glpackfmt = GL_UNSIGNED_BYTE;
+       break;
+     case GR_TEXFMT_ARGB_4444:
+ 
+@@ -567,23 +622,29 @@
+       {
+         for (j=0; j<width; j++)
+         {
+-          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
++/*          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
+           unsigned int A = texel & 0x0000F000;
+           unsigned int B = texel & 0x00000F00;
+           unsigned int G = texel & 0x000000F0;
+           unsigned int R = texel & 0x0000000F;
+ #if 0
+-          /* accurate conversion */
++          // accurate conversion
+           ((unsigned int*)texture)[n] = (A << 16) | (A << 12) | (R << 20) | (R << 16) | (G << 8) | (G << 4) | (B >> 4) | (B >> 8);
+ #else
+           ((unsigned int*)texture)[n] = (A << 16) | (R << 20) | (G << 8) | (B >> 4);
+ #endif
++*/
++          unsigned short texel = ((unsigned short*)info->data)[m];
++          unsigned int A = (texel & 0xF000)>>12;
++          ((unsigned short*)texture)[n] = A|(texel&0x0fff)<<4;
+           m++;
+           n++;
+         }
+       }
+       factor = 2;
+       glformat = GL_RGBA;
++      glformat = gltexfmt = glpixfmt = GL_RGBA;
++      glpackfmt = GL_UNSIGNED_SHORT_4_4_4_4;
+       break;
+     case GR_TEXFMT_ARGB_8888:
+       for (i=0; i<height; i++)
+@@ -650,7 +711,9 @@
+   if (largest_supported_anisotropy > 1.0f)
+     glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, largest_supported_anisotropy);
+ 
+-  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
++//*SEB*  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
++//printf("new texture, id=%x, size=%ix%i, fmt=%x/%x\n", startAddress+1, width, height, gltexfmt, glpackfmt);
++  glTexImage2D(GL_TEXTURE_2D, 0, gltexfmt, width, height, 0, glpixfmt, glpackfmt, texture);
+ /*
+   switch(info->format)
+   {