Glide Plugin GLES2 port from mupen64plus-ae, but with special FrameSkip code
[mupen64plus-pandora.git] / source / gles2glide64 / pandora.diff
1 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp ./Glide64/3dmath.cpp
2 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp      2013-09-06 22:05:28.000000000 +0200
3 +++ ./Glide64/3dmath.cpp        2013-09-14 09:41:13.000000000 +0200
4 @@ -202,15 +202,109 @@
5    }
6  }
7  
8 +#ifdef __ARM_NEON__
9 +void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4])
10 +{
11 +    asm volatile (
12 +       "vld1.32                {d0, d1}, [%1]!                 \n\t"   //q0 = m1
13 +       "vld1.32                {d2, d3}, [%1]!         \n\t"   //q1 = m1+4
14 +       "vld1.32                {d4, d5}, [%1]!         \n\t"   //q2 = m1+8
15 +       "vld1.32                {d6, d7}, [%1]          \n\t"   //q3 = m1+12
16 +       "vld1.32                {d16, d17}, [%0]!               \n\t"   //q8 = m0
17 +       "vld1.32                {d18, d19}, [%0]!       \n\t"   //q9 = m0+4
18 +       "vld1.32                {d20, d21}, [%0]!       \n\t"   //q10 = m0+8
19 +       "vld1.32                {d22, d23}, [%0]        \n\t"   //q11 = m0+12
20 +
21 +       "vmul.f32               q12, q8, d0[0]                  \n\t"   //q12 = q8 * d0[0]
22 +       "vmul.f32               q13, q8, d2[0]              \n\t"       //q13 = q8 * d2[0]
23 +       "vmul.f32               q14, q8, d4[0]              \n\t"       //q14 = q8 * d4[0]
24 +       "vmul.f32               q15, q8, d6[0]                  \n\t"   //q15 = q8 * d6[0]
25 +       "vmla.f32               q12, q9, d0[1]                  \n\t"   //q12 = q9 * d0[1]
26 +       "vmla.f32               q13, q9, d2[1]              \n\t"       //q13 = q9 * d2[1]
27 +       "vmla.f32               q14, q9, d4[1]              \n\t"       //q14 = q9 * d4[1]
28 +       "vmla.f32               q15, q9, d6[1]              \n\t"       //q15 = q9 * d6[1]
29 +       "vmla.f32               q12, q10, d1[0]                 \n\t"   //q12 = q10 * d0[0]
30 +       "vmla.f32               q13, q10, d3[0]                 \n\t"   //q13 = q10 * d2[0]
31 +       "vmla.f32               q14, q10, d5[0]                 \n\t"   //q14 = q10 * d4[0]
32 +       "vmla.f32               q15, q10, d7[0]                 \n\t"   //q15 = q10 * d6[0]
33 +       "vmla.f32               q12, q11, d1[1]                 \n\t"   //q12 = q11 * d0[1]
34 +       "vmla.f32               q13, q11, d3[1]                 \n\t"   //q13 = q11 * d2[1]
35 +       "vmla.f32               q14, q11, d5[1]                 \n\t"   //q14 = q11 * d4[1]
36 +       "vmla.f32               q15, q11, d7[1]             \n\t"       //q15 = q11 * d6[1]
37 +
38 +       "vst1.32                {d24, d25}, [%2]!               \n\t"   //d = q12
39 +       "vst1.32                {d26, d27}, [%2]!           \n\t"       //d+4 = q13
40 +       "vst1.32                {d28, d29}, [%2]!           \n\t"       //d+8 = q14
41 +       "vst1.32                {d30, d31}, [%2]            \n\t"       //d+12 = q15
42 +
43 +       :"+r"(m0), "+r"(m1), "+r"(dest):
44 +    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
45 +    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
46 +    "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
47 +    "memory"
48 +       );
49 +}
50 +
51 +void Normalize_neon(float v[3])
52 +{
53 +       asm volatile (
54 +       "vld1.32                {d4}, [%0]!                     \n\t"   //d4={x,y}
55 +       "flds                   s10, [%0]               \n\t"   //d5[0] = z
56 +       "sub                    %0, %0, #8              \n\t"   //d5[0] = z
57 +       "vmul.f32               d0, d4, d4                              \n\t"   //d0= d4*d4
58 +       "vpadd.f32              d0, d0, d0                              \n\t"   //d0 = d[0] + d[1]
59 +    "vmla.f32          d0, d5, d5                              \n\t"   //d0 = d0 + d5*d5
60 +
61 +       "vmov.f32               d1, d0                                  \n\t"   //d1 = d0
62 +       "vrsqrte.f32    d0, d0                                  \n\t"   //d0 = ~ 1.0 / sqrt(d0)
63 +       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
64 +       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d2) / 2
65 +       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d3
66 +       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
67 +       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d3) / 2
68 +       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d4
69 +
70 +       "vmul.f32               q2, q2, d0[0]                   \n\t"   //d0= d2*d4
71 +       "vst1.32                {d4}, [%0]!                     \n\t"   //d2={x0,y0}, d3={z0, w0}
72 +       "fsts                   s10, [%0]                       \n\t"   //d2={x0,y0}, d3={z0, w0}
73 +
74 +       :"+r"(v) :
75 +    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
76 +       );
77 +}
78 +
79 +float DotProduct_neon( float v0[3], float v1[3] )
80 +{
81 +    float dot;
82 +       asm volatile (
83 +       "vld1.32                {d8}, [%1]!                     \n\t"   //d8={x0,y0}
84 +       "vld1.32                {d10}, [%2]!            \n\t"   //d10={x1,y1}
85 +       "flds                   s18, [%1, #0]       \n\t"       //d9[0]={z0}
86 +       "flds                   s22, [%2, #0]       \n\t"       //d11[0]={z1}
87 +       "vmul.f32               d12, d8, d10            \n\t"   //d0= d2*d4
88 +       "vpadd.f32              d12, d12, d12           \n\t"   //d0 = d[0] + d[1]
89 +       "vmla.f32               d12, d9, d11            \n\t"   //d0 = d0 + d3*d5
90 +    "fmrs              %0, s24                 \n\t"   //r0 = s0
91 +       : "=r"(dot), "+r"(v0), "+r"(v1):
92 +    : "d8", "d9", "d10", "d11", "d12"
93 +
94 +       );
95 +    return dot;
96 +}
97 +
98 +#endif
99 +
100  // 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication
101  //                      and 3DNOW! 4x4 4x4 matrix multiplication
102  // 2011-01-03 Balrog - removed because is in NASM format and not 64-bit compatible
103  // This will need fixing.
104 +#ifndef __ARM_NEON__
105  MULMATRIX MulMatrices = MulMatricesC;
106  TRANSFORMVECTOR TransformVector = TransformVectorC;
107  TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;
108  DOTPRODUCT DotProduct = DotProductC;
109  NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;
110 +#endif
111  
112  void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
113  {
114 @@ -361,6 +455,7 @@
115  
116    void math_init()
117    {
118 +#ifndef __ARM_NEON__
119  #ifndef _DEBUG
120      int IsSSE = FALSE;
121  #if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)
122 @@ -429,4 +524,5 @@
123        }
124  
125  #endif //_DEBUG
126 +#endif //__ARM_NEON__
127      }
128 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h ./Glide64/3dmath.h
129 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h        2013-09-06 22:05:28.000000000 +0200
130 +++ ./Glide64/3dmath.h  2013-09-14 19:01:12.000000000 +0200
131 @@ -42,7 +42,22 @@
132  void calc_sphere (VERTEX *v);
133  
134  void math_init();
135 +#ifdef __ARM_NEON__
136 +float DotProductC(register float *v1, register float *v2);
137 +void NormalizeVectorC(float *v);
138 +void TransformVectorC(float *src, float *dst, float mat[4][4]);
139 +void InverseTransformVectorC (float *src, float *dst, float mat[4][4]);
140 +void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4]);
141 +void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4]);
142 +void Normalize_neon(float v[3]);
143 +float DotProduct_neon( float v0[3], float v1[3] );
144  
145 +#define MulMatrices                            MulMatricesC            //MultMatrix_neon
146 +#define TransformVector                        TransformVectorC
147 +#define InverseTransformVector InverseTransformVectorC
148 +#define DotProduct                             DotProductC                     //DotProduct_neon
149 +#define NormalizeVector                        NormalizeVectorC        //Normalize_neon
150 +#else
151  typedef void (*MULMATRIX)(float m1[4][4],float m2[4][4],float r[4][4]); 
152  extern MULMATRIX MulMatrices;
153  typedef void (*TRANSFORMVECTOR)(float *src,float *dst,float mat[4][4]); 
154 @@ -52,3 +67,4 @@
155  extern DOTPRODUCT DotProduct;
156  typedef void (*NORMALIZEVECTOR)(float *v);
157  extern NORMALIZEVECTOR NormalizeVector;
158 +#endif
159 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp ./Glide64/3dmathneon.cpp
160 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp  1970-01-01 01:00:00.000000000 +0100
161 +++ ./Glide64/3dmathneon.cpp    2013-09-13 23:05:47.000000000 +0200
162 @@ -0,0 +1,133 @@
163 +#include "3dmath.h"
164 +
165 +static void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4])
166 +{
167 +    asm volatile (
168 +       "vld1.32                {d0, d1}, [%1]!                 \n\t"   //q0 = m1
169 +       "vld1.32                {d2, d3}, [%1]!         \n\t"   //q1 = m1+4
170 +       "vld1.32                {d4, d5}, [%1]!         \n\t"   //q2 = m1+8
171 +       "vld1.32                {d6, d7}, [%1]          \n\t"   //q3 = m1+12
172 +       "vld1.32                {d16, d17}, [%0]!               \n\t"   //q8 = m0
173 +       "vld1.32                {d18, d19}, [%0]!       \n\t"   //q9 = m0+4
174 +       "vld1.32                {d20, d21}, [%0]!       \n\t"   //q10 = m0+8
175 +       "vld1.32                {d22, d23}, [%0]        \n\t"   //q11 = m0+12
176 +
177 +       "vmul.f32               q12, q8, d0[0]                  \n\t"   //q12 = q8 * d0[0]
178 +       "vmul.f32               q13, q8, d2[0]              \n\t"       //q13 = q8 * d2[0]
179 +       "vmul.f32               q14, q8, d4[0]              \n\t"       //q14 = q8 * d4[0]
180 +       "vmul.f32               q15, q8, d6[0]                  \n\t"   //q15 = q8 * d6[0]
181 +       "vmla.f32               q12, q9, d0[1]                  \n\t"   //q12 = q9 * d0[1]
182 +       "vmla.f32               q13, q9, d2[1]              \n\t"       //q13 = q9 * d2[1]
183 +       "vmla.f32               q14, q9, d4[1]              \n\t"       //q14 = q9 * d4[1]
184 +       "vmla.f32               q15, q9, d6[1]              \n\t"       //q15 = q9 * d6[1]
185 +       "vmla.f32               q12, q10, d1[0]                 \n\t"   //q12 = q10 * d0[0]
186 +       "vmla.f32               q13, q10, d3[0]                 \n\t"   //q13 = q10 * d2[0]
187 +       "vmla.f32               q14, q10, d5[0]                 \n\t"   //q14 = q10 * d4[0]
188 +       "vmla.f32               q15, q10, d7[0]                 \n\t"   //q15 = q10 * d6[0]
189 +       "vmla.f32               q12, q11, d1[1]                 \n\t"   //q12 = q11 * d0[1]
190 +       "vmla.f32               q13, q11, d3[1]                 \n\t"   //q13 = q11 * d2[1]
191 +       "vmla.f32               q14, q11, d5[1]                 \n\t"   //q14 = q11 * d4[1]
192 +       "vmla.f32               q15, q11, d7[1]             \n\t"       //q15 = q11 * d6[1]
193 +
194 +       "vst1.32                {d24, d25}, [%2]!               \n\t"   //d = q12
195 +       "vst1.32                {d26, d27}, [%2]!           \n\t"       //d+4 = q13
196 +       "vst1.32                {d28, d29}, [%2]!           \n\t"       //d+8 = q14
197 +       "vst1.32                {d30, d31}, [%2]            \n\t"       //d+12 = q15
198 +
199 +       :"+r"(m0), "+r"(m1), "+r"(dest):
200 +    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
201 +    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
202 +    "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
203 +    "memory"
204 +       );
205 +}
206 +
207 +static void TransformVectorNormalize_neon(float vec[3], float mtx[4][4])
208 +{
209 +       asm volatile (
210 +       "vld1.32                {d0}, [%1]                      \n\t"   //Q0 = v
211 +       "flds                   s2, [%1, #8]                    \n\t"   //Q0 = v
212 +       "vld1.32                {d18, d19}, [%0]!               \n\t"   //Q1 = m
213 +       "vld1.32                {d20, d21}, [%0]!           \n\t"       //Q2 = m+4
214 +       "vld1.32                {d22, d23}, [%0]            \n\t"       //Q3 = m+8
215 +
216 +       "vmul.f32               q2, q9, d0[0]                   \n\t"   //q2 = q9*Q0[0]
217 +       "vmla.f32               q2, q10, d0[1]                  \n\t"   //Q5 += Q1*Q0[1]
218 +       "vmla.f32               q2, q11, d1[0]                  \n\t"   //Q5 += Q2*Q0[2]
219 +
220 +    "vmul.f32          d0, d4, d4                              \n\t"   //d0 = d0*d0
221 +       "vpadd.f32              d0, d0, d0                              \n\t"   //d0 = d[0] + d[1]
222 +    "vmla.f32          d0, d5, d5                              \n\t"   //d0 = d0 + d1*d1
223 +
224 +       "vmov.f32               d1, d0                                  \n\t"   //d1 = d0
225 +       "vrsqrte.f32    d0, d0                                  \n\t"   //d0 = ~ 1.0 / sqrt(d0)
226 +       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
227 +       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d2) / 2
228 +       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d3
229 +       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
230 +       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d3) / 2
231 +       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d4
232 +
233 +       "vmul.f32               q2, q2, d0[0]                   \n\t"   //d0= d2*d4
234 +
235 +       "vst1.32                {d4}, [%1]                  \n\t"       //Q4 = m+12
236 +       "fsts                   s10, [%1, #8]           \n\t"   //Q4 = m+12
237 +       : "+r"(mtx): "r"(vec)
238 +    : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory"
239 +       );
240 +}
241 +
242 +static void Normalize_neon(float v[3])
243 +{
244 +       asm volatile (
245 +       "vld1.32                {d4}, [%0]!                     \n\t"   //d4={x,y}
246 +       "flds                   s10, [%0]               \n\t"   //d5[0] = z
247 +       "sub                    %0, %0, #8              \n\t"   //d5[0] = z
248 +       "vmul.f32               d0, d4, d4                              \n\t"   //d0= d4*d4
249 +       "vpadd.f32              d0, d0, d0                              \n\t"   //d0 = d[0] + d[1]
250 +    "vmla.f32          d0, d5, d5                              \n\t"   //d0 = d0 + d5*d5
251 +
252 +       "vmov.f32               d1, d0                                  \n\t"   //d1 = d0
253 +       "vrsqrte.f32    d0, d0                                  \n\t"   //d0 = ~ 1.0 / sqrt(d0)
254 +       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
255 +       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d2) / 2
256 +       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d3
257 +       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
258 +       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d3) / 2
259 +       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d4
260 +
261 +       "vmul.f32               q2, q2, d0[0]                   \n\t"   //d0= d2*d4
262 +       "vst1.32                {d4}, [%0]!                     \n\t"   //d2={x0,y0}, d3={z0, w0}
263 +       "fsts                   s10, [%0]                       \n\t"   //d2={x0,y0}, d3={z0, w0}
264 +
265 +       :"+r"(v) :
266 +    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
267 +       );
268 +}
269 +
270 +static float DotProduct_neon( float v0[3], float v1[3] )
271 +{
272 +    float dot;
273 +       asm volatile (
274 +       "vld1.32                {d8}, [%1]!                     \n\t"   //d8={x0,y0}
275 +       "vld1.32                {d10}, [%2]!            \n\t"   //d10={x1,y1}
276 +       "flds                   s18, [%1, #0]       \n\t"       //d9[0]={z0}
277 +       "flds                   s22, [%2, #0]       \n\t"       //d11[0]={z1}
278 +       "vmul.f32               d12, d8, d10            \n\t"   //d0= d2*d4
279 +       "vpadd.f32              d12, d12, d12           \n\t"   //d0 = d[0] + d[1]
280 +       "vmla.f32               d12, d9, d11            \n\t"   //d0 = d0 + d3*d5
281 +    "fmrs              %0, s24                 \n\t"   //r0 = s0
282 +       : "=r"(dot), "+r"(v0), "+r"(v1):
283 +    : "d8", "d9", "d10", "d11", "d12"
284 +
285 +       );
286 +    return dot;
287 +}
288 +
289 +void MathInitNeon()
290 +{
291 +    MulMatrices = MultMatrix_neon;
292 +    //TransformVectorNormalize = TransformVectorNormalize_neon;
293 +    NormalizeVector = Normalize_neon;
294 +    DotProduct = DotProduct_neon;
295 +}
296 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp ./Glide64/Config.cpp
297 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp      2013-09-06 22:05:29.000000000 +0200
298 +++ ./Glide64/Config.cpp        2013-09-07 10:51:27.000000000 +0200
299 @@ -89,7 +89,7 @@
300    { 640, 480 },
301    { 800, 600 },
302    { 960, 720 },
303 -  { 856, 480 },
304 +  { 800, 480 },
305    { 512, 256 },
306    { 1024, 768 },
307    { 1280, 1024 },
308 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp ./Glide64/CRC.cpp
309 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp 2013-09-06 22:05:28.000000000 +0200
310 +++ ./Glide64/CRC.cpp   2013-09-08 13:12:00.000000000 +0200
311 @@ -43,6 +43,7 @@
312  //
313  //****************************************************************
314  //*
315 +
316  #define CRC32_POLYNOMIAL     0x04C11DB7
317  
318  unsigned int CRCTable[ 256 ];
319 @@ -140,3 +141,4 @@
320     return Crc32;
321  }
322  //*/
323 +
324 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp ./Glide64/FBtoScreen.cpp
325 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp  2013-09-06 22:05:29.000000000 +0200
326 +++ ./Glide64/FBtoScreen.cpp    2013-09-08 11:57:33.000000000 +0200
327 @@ -165,12 +165,15 @@
328      for (wxUint32 w = 0; w < 256; w++)
329      {
330        col = *(src++);
331 -      r = (wxUint8)((col >> 24)&0xFF);
332 +      r = (wxUint8)((col >> (24+3))&0x1F);
333 +      g = (wxUint8)((col >> (16+2))&0x3F);
334 +      b = (wxUint8)((col >>  (8+3))&0x1F);
335 +/*      r = (wxUint8)((col >> 24)&0xFF);
336        r = (wxUint8)((float)r / 255.0f * 31.0f);
337        g = (wxUint8)((col >> 16)&0xFF);
338        g = (wxUint8)((float)g / 255.0f * 63.0f);
339        b = (wxUint8)((col >>  8)&0xFF);
340 -      b = (wxUint8)((float)b / 255.0f * 31.0f);
341 +      b = (wxUint8)((float)b / 255.0f * 31.0f);*/      //*SEB*
342        *(dst++) = (r << 11) | (g << 5) | b;
343      }
344      src += (fb_info.width - 256);
345 @@ -261,12 +264,15 @@
346              if (idx >= bound)
347                break;
348              c32 = src32[idx];
349 -            r = (wxUint8)((c32 >> 24)&0xFF);
350 +            r = (wxUint8)((c32 >> (24+3))&0x1F);
351 +            g = (wxUint8)((c32 >> (16+2))&0x3F);
352 +            b = (wxUint8)((c32 >>  (8+3))&0x1F);
353 +/*            r = (wxUint8)((c32 >> 24)&0xFF);
354              r = (wxUint8)((float)r / 255.0f * 31.0f);
355              g = (wxUint8)((c32 >> 16)&0xFF);
356              g = (wxUint8)((float)g / 255.0f * 63.0f);
357              b = (wxUint8)((c32 >>  8)&0xFF);
358 -            b = (wxUint8)((float)b / 255.0f * 31.0f);
359 +            b = (wxUint8)((float)b / 255.0f * 31.0f);*/        //*SEB*
360              a = (c32&0xFF) ? 1 : 0;
361              *(dst++) = (a<<15) | (r << 10) | (g << 5) | b;
362            }
363 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h ./Glide64/Gfx_1.3.h
364 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h       2013-09-06 22:05:29.000000000 +0200
365 +++ ./Glide64/Gfx_1.3.h 2013-09-08 16:22:57.000000000 +0200
366 @@ -106,6 +106,8 @@
367  // ** TAKE OUT BEFORE RELEASE!!! **
368  //#define LOGGING                      // log of spec functions called
369  //#define LOG_KEY                      // says "Key!!!" in the log when space bar is pressed
370 +//#define EXT_LOGGING
371 +//#define PERFORMANCE
372  
373  //#define LOG_UCODE
374  
375 @@ -120,15 +122,15 @@
376  
377  #define FPS                                    // fps counter able? (not enabled necessarily)
378  
379 -#define LOGNOTKEY                       // Log if not pressing:
380 -#define LOGKEY         0x11 // this key (CONTROL)
381 +//#define LOGNOTKEY                     // Log if not pressing:
382 +//#define LOGKEY               0x11 // this key (CONTROL)
383  
384  //#define LOG_COMMANDS         // log the whole 64-bit command as (0x........, 0x........)
385  
386  #define CATCH_EXCEPTIONS       // catch exceptions so it doesn't freeze and will report
387                                                         // "The gfx plugin has caused an exception" instead.
388  
389 -#define FLUSH                          // flush the file buffer. slower logging, but makes sure
390 +//#define FLUSH                                // flush the file buffer. slower logging, but makes sure
391                                                         //  the command is logged before continuing (in case of
392                                                         //  crash or exception, the log will not be cut short)
393  #ifndef _ENDUSER_RELEASE_
394 @@ -144,7 +146,7 @@
395  
396  
397  // Usually enabled
398 -#define LARGE_TEXTURE_HANDLING // allow large-textured objects to be split?
399 +//#define LARGE_TEXTURE_HANDLING       // allow large-textured objects to be split?
400  
401  #ifdef ALTTAB_FIX
402  extern HHOOK hhkLowLevelKybd;
403 @@ -189,7 +191,6 @@
404  
405  int CheckKeyPressed(int key, int mask);
406  
407 -//#define PERFORMANCE
408  #ifdef PERFORMANCE
409  extern int64 perf_cur;
410  extern int64 perf_next;
411 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp ./Glide64/Main.cpp
412 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp        2013-09-06 22:05:29.000000000 +0200
413 +++ ./Glide64/Main.cpp  2013-09-15 17:06:29.000000000 +0200
414 @@ -170,7 +170,7 @@
415  // 60=0x0, 70=0x1, 72=0x2, 75=0x3, 80=0x4, 90=0x5, 100=0x6, 85=0x7, 120=0x8, none=0xff
416  
417  #ifdef PAULSCODE
418 -#include "ae_bridge.h"
419 +//#include "ae_bridge.h"
420  #include "FrameSkipper.h"
421  FrameSkipper frameSkipper;
422  #endif
423 @@ -1768,12 +1768,13 @@
424  EXPORT void CALL RomClosed (void)
425  {
426    VLOG ("RomClosed ()\n");
427 +printf("RomClosed ()\n");
428  
429    CLOSE_RDP_LOG ();
430    CLOSE_RDP_E_LOG ();
431    rdp.window_changed = TRUE;
432    romopen = FALSE;
433 -  if (fullscreen && evoodoo)
434 +//  if (fullscreen && evoodoo)//*SEB*
435      ReleaseGfx ();
436  }
437  
438 @@ -1973,9 +1974,6 @@
439  wxUint32 update_screen_count = 0;
440  EXPORT void CALL UpdateScreen (void)
441  {
442 -#ifdef PAULSCODE
443 -  frameSkipper.update();
444 -#endif
445  #ifdef LOG_KEY
446    if (CheckKeyPressed(G64_VK_SPACE, 0x0001))
447    {
448 @@ -2020,6 +2018,9 @@
449      no_dlist = true;
450      ClearCache ();
451      UpdateScreen();
452 +#ifdef PAULSCODE
453 +  frameSkipper.update();
454 +#endif
455      return;
456    }
457    //*/
458 @@ -2035,11 +2036,17 @@
459        rdp.updatescreen = 1;
460        newSwapBuffers ();
461      }
462 +#ifdef PAULSCODE
463 +  frameSkipper.update();
464 +#endif
465      return;
466    }
467    //*/
468    if (settings.swapmode == 0)
469      newSwapBuffers ();
470 +#ifdef PAULSCODE
471 +  frameSkipper.update();
472 +#endif
473  }
474  
475  static void DrawWholeFrameBufferToScreen()
476 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp ./Glide64/rdp.cpp
477 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp 2013-09-06 22:05:29.000000000 +0200
478 +++ ./Glide64/rdp.cpp   2013-09-13 22:23:52.000000000 +0200
479 @@ -56,6 +56,10 @@
480  extern FrameSkipper frameSkipper;
481  #endif
482  
483 +#ifdef PERFORMANCE
484 +#include "ticks.h"
485 +#endif
486 +
487  /*
488  const int NumOfFormats = 3;
489  SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
490 @@ -633,18 +637,21 @@
491  
492  EXPORT void CALL ProcessDList(void)
493  {
494 -  SoftLocker lock(mutexProcessDList);
495 +//  SoftLocker lock(mutexProcessDList);
496  #ifdef PAULSCODE
497 -  if (frameSkipper.willSkipNext() || !lock.IsOk()) //mutex is busy
498 +  if (frameSkipper.willSkipNext() /*|| !lock.IsOk()*/) //mutex is busy
499  #else
500 -  if (!lock.IsOk()) //mutex is busy
501 +  if (/*!lock.IsOk()*/0) //mutex is busy
502  #endif
503    {
504 +// printf("Frameskip, reason=%s\n", (lock.IsOk())?"lock":"frameskip");
505      if (!fullscreen)
506        drawNoFullscreenMessage();
507      // Set an interrupt to allow the game to continue
508      *gfx.MI_INTR_REG |= 0x20;
509      gfx.CheckInterrupts();
510 +       *gfx.MI_INTR_REG |= 0x01;
511 +       gfx.CheckInterrupts();
512      return;
513    }
514  
515 @@ -717,7 +724,18 @@
516      unimp.close();
517    }
518  #endif
519 -
520 +/*
521 +#ifdef PAULSCODE
522 +  if (frameSkipper.willSkipNext())
523 +  {
524 +       *gfx.MI_INTR_REG |= 0x20;
525 +       gfx.CheckInterrupts();
526 +       *gfx.MI_INTR_REG |= 0x01;
527 +       gfx.CheckInterrupts();
528 +       return;
529 +  }
530 +#endif
531 +*/
532    //* Set states *//
533    if (settings.swapmode > 0)
534      SwapOK = TRUE;
535 @@ -818,7 +836,7 @@
536          rdp.pc[rdp.pc_i] = (a+8) & BMASK;
537  
538  #ifdef PERFORMANCE
539 -        perf_cur = wxDateTime::UNow();
540 +        perf_cur = ticksGetTicks();
541  #endif
542          // Process this instruction
543          gfx_instruction[settings.ucode][rdp.cmd0>>24] ();
544 @@ -837,9 +855,13 @@
545          }
546  
547  #ifdef PERFORMANCE
548 -        perf_next = wxDateTime::UNow();
549 -        sprintf (out_buf, "perf %08lx: %016I64d\n", a-8, (perf_next-perf_cur).Format(_T("%l")).mb_str());
550 +        perf_next = ticksGetTicks();
551 +        sprintf (out_buf, "perf %08x: %lli\n", a-8, (perf_next-perf_cur));
552 +#ifdef RDP_LOGGING
553          rdp_log << out_buf;
554 +#else
555 +               printf(out_buf);
556 +#endif
557  #endif
558  
559        } while (!rdp.halt);
560 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp ./Glide64/Util.cpp
561 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp        2013-09-06 22:05:29.000000000 +0200
562 +++ ./Glide64/Util.cpp  2013-09-08 12:39:52.000000000 +0200
563 @@ -289,29 +289,29 @@
564    deltaZ = dzdx = 0;
565    if (linew == 0 && (fb_depth_render_enabled || (rdp.rm & 0xC00) == 0xC00))
566    {
567 -    double X0 = vtx[0]->sx / rdp.scale_x;
568 -    double Y0 = vtx[0]->sy / rdp.scale_y;
569 -    double X1 = vtx[1]->sx / rdp.scale_x;
570 -    double Y1 = vtx[1]->sy / rdp.scale_y;
571 -    double X2 = vtx[2]->sx / rdp.scale_x;
572 -    double Y2 = vtx[2]->sy / rdp.scale_y;
573 -    double diffy_02 = Y0 - Y2;
574 -    double diffy_12 = Y1 - Y2;
575 -    double diffx_02 = X0 - X2;
576 -    double diffx_12 = X1 - X2;
577 -
578 -    double denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02);
579 -    if(denom*denom > 0.0)
580 -    {
581 -      double diffz_02 = vtx[0]->sz - vtx[2]->sz;
582 -      double diffz_12 = vtx[1]->sz - vtx[2]->sz;
583 -      double fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom;
584 +    float X0 = vtx[0]->sx / rdp.scale_x;
585 +    float Y0 = vtx[0]->sy / rdp.scale_y;
586 +    float X1 = vtx[1]->sx / rdp.scale_x;
587 +    float Y1 = vtx[1]->sy / rdp.scale_y;
588 +    float X2 = vtx[2]->sx / rdp.scale_x;
589 +    float Y2 = vtx[2]->sy / rdp.scale_y;
590 +    float diffy_02 = Y0 - Y2;
591 +    float diffy_12 = Y1 - Y2;
592 +    float diffx_02 = X0 - X2;
593 +    float diffx_12 = X1 - X2;
594 +
595 +    float denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02);
596 +    if(denom*denom > 0.0f)
597 +    {
598 +      float diffz_02 = vtx[0]->sz - vtx[2]->sz;
599 +      float diffz_12 = vtx[1]->sz - vtx[2]->sz;
600 +      float fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom;
601        if ((rdp.rm & 0xC00) == 0xC00) {
602          // Calculate deltaZ per polygon for Decal z-mode
603 -        double fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom;
604 -        double fdz = fabs(fdzdx) + fabs(fdzdy);
605 +        float fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom;
606 +        float fdz = fabs(fdzdx) + fabs(fdzdy);
607          if ((settings.hacks & hack_Zelda) && (rdp.rm & 0x800))
608 -          fdz *= 4.0;  // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads
609 +          fdz *= 4.0f;  // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads
610          deltaZ = max(8, (int)fdz);
611        }
612        dzdx = (int)(fdzdx * 65536.0);
613 @@ -881,12 +881,12 @@
614  //*/
615  
616  typedef struct {
617 -  double d;
618 -  double x;
619 -  double y;
620 +  float d;             //*SEB* was doubles
621 +  float x;
622 +  float y;
623  } LineEuqationType;
624  
625 -static double EvaLine(LineEuqationType &li, double x, double y)
626 +static float EvaLine(LineEuqationType &li, float x, float y)   //*SEB* all double before
627  {
628    return li.x*x+li.y*y+li.d;
629  }
630 @@ -906,7 +906,7 @@
631  }
632  
633  
634 -__inline double interp3p(float a, float b, float c, double r1, double r2)
635 +__inline float interp3p(float a, float b, float c, float r1, float r2) //*SEB* r1 and r2 and function was double
636  {
637    return (a)+(((b)+((c)-(b))*(r2))-(a))*(r1);
638  }
639 @@ -915,34 +915,34 @@
640    (a+(((b)+((c)-(b))*(r2))-(a))*(r1))
641  */
642  
643 -static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out)
644 +static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out)        //*SEB* all double before
645  {
646  
647    LineEuqationType line;
648    Create1LineEq(line, v2, v3, v1);
649  
650 -  double aDot = (out.x*line.x + out.y*line.y);
651 -  double bDot = (v1.sx*line.x + v1.sy*line.y);
652 +  float aDot = (out.x*line.x + out.y*line.y);
653 +  float bDot = (v1.sx*line.x + v1.sy*line.y);
654  
655 -  double scale1 = ( - line.d - aDot) / ( bDot - aDot );
656 +  float scale1 = ( - line.d - aDot) / ( bDot - aDot );
657  
658 -  double tx = out.x + scale1 * (v1.sx - out.x);
659 -  double ty = out.y + scale1 * (v1.sy - out.y);
660 +  float tx = out.x + scale1 * (v1.sx - out.x);
661 +  float ty = out.y + scale1 * (v1.sy - out.y);
662  
663 -  double s1 = 101.0, s2 = 101.0;
664 -  double den = tx - v1.sx;
665 -  if (fabs(den) > 1.0)
666 +  float s1 = 101.0, s2 = 101.0;
667 +  float den = tx - v1.sx;
668 +  if (fabsf(den) > 1.0)
669      s1 = (out.x-v1.sx)/den;
670    if (s1 > 100.0f)
671      s1 = (out.y-v1.sy)/(ty-v1.sy);
672  
673    den = v3.sx - v2.sx;
674 -  if (fabs(den) > 1.0)
675 +  if (fabsf(den) > 1.0)
676      s2 = (tx-v2.sx)/den;
677    if (s2 > 100.0f)
678      s2 =(ty-v2.sy)/(v3.sy-v2.sy);
679  
680 -  double w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2);
681 +  float w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2);
682  
683    out.r = real_to_char(interp3p(v1.r*v1.oow,v2.r*v2.oow,v3.r*v3.oow,s1,s2)*w);
684    out.g = real_to_char(interp3p(v1.g*v1.oow,v2.g*v2.oow,v3.g*v3.oow,s1,s2)*w);
685 @@ -976,8 +976,8 @@
686    */
687    float deltaS, deltaT;
688    float deltaX, deltaY;
689 -  double deltaTexels, deltaPixels, lodFactor = 0;
690 -  double intptr;
691 +  float deltaTexels, deltaPixels, lodFactor = 0;       //*SEB* double before
692 +  float intptr;                                                                                //*SEB* double before
693    float s_scale = rdp.tiles[rdp.cur_tile].width / 255.0f;
694    float t_scale = rdp.tiles[rdp.cur_tile].height / 255.0f;
695    if (settings.lodmode == 1)
696 @@ -1019,7 +1019,7 @@
697    float lod_fraction = 1.0f;
698    if (lod_tile < rdp.cur_tile + rdp.mipmap_level)
699    {
700 -       lod_fraction = max((float)modf(lodFactor / pow(2.,lod_tile),&intptr), rdp.prim_lodmin / 255.0f);
701 +       lod_fraction = max((float)modff(lodFactor / powf(2.,lod_tile),&intptr), (float)rdp.prim_lodmin / 255.0f);
702    }
703    float detailmax;
704    if (cmb.dc0_detailmax < 0.5f)
705 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp ./GlideHQ/TxDbg.cpp
706 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp       2013-09-06 22:05:30.000000000 +0200
707 +++ ./GlideHQ/TxDbg.cpp 2013-09-07 12:06:11.000000000 +0200
708 @@ -28,6 +28,8 @@
709  #include <stdarg.h>
710  #include <string>
711  
712 +#define _GLIBCXX_HAVE_BROKEN_VSWPRINTF 1
713 +
714  TxDbg::TxDbg()
715  {
716    _level = DBG_LEVEL;
717 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp ./Glitch64/combiner.cpp
718 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp   2013-09-06 22:05:30.000000000 +0200
719 +++ ./Glitch64/combiner.cpp     2013-09-14 10:16:36.000000000 +0200
720 @@ -29,6 +29,8 @@
721  #include "glide.h"
722  #include "main.h"
723  
724 +#define GLchar char
725 +
726  void vbo_draw();
727  
728  static int fct[4], source0[4], operand0[4], source1[4], operand1[4], source2[4], operand2[4];
729 @@ -117,10 +119,11 @@
730  // using gl_FragCoord is terribly slow on ATI and varying variables don't work for some unknown
731  // reason, so we use the unused components of the texture2 coordinates
732  static const char* fragment_shader_dither =
733 -"  float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n"
734 +" \n"
735 +/*"  float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n"
736  "  float dithy = (gl_TexCoord[2].a + 1.0)*0.5*1000.0; \n"
737  "  if(texture2D(ditherTex, vec2((dithx-32.0*floor(dithx/32.0))/32.0, \n"
738 -"                               (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n"
739 +"                               (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n"*/
740  ;
741  
742  static const char* fragment_shader_default =
743 @@ -165,11 +168,16 @@
744  "}                               \n"
745  ;
746  
747 +static const char* fragment_shader_alt_end =
748 +"                                \n"
749 +"}                               \n"
750 +;
751 +
752  static const char* vertex_shader =
753  SHADER_HEADER
754  "#define Z_MAX 65536.0                                          \n"
755  "attribute highp vec4 aVertex;                                  \n"
756 -"attribute highp vec4 aColor;                                   \n"
757 +"attribute mediump vec4 aColor;                                   \n"  //*SEB* highp -> lowp
758  "attribute highp vec4 aMultiTexCoord0;                          \n"
759  "attribute highp vec4 aMultiTexCoord1;                          \n"
760  "attribute float aFog;                                          \n"
761 @@ -245,7 +253,7 @@
762  
763    // creating a fake texture
764    glBindTexture(GL_TEXTURE_2D, default_texture);
765 -  glTexImage2D(GL_TEXTURE_2D, 0, 3, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
766 +  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
767    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
768    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
769  
770 @@ -286,7 +294,7 @@
771      strlen(fragment_shader_end)+1);
772    strcpy(fragment_shader, fragment_shader_header);
773    strcat(fragment_shader, fragment_shader_default);
774 -  strcat(fragment_shader, fragment_shader_end);
775 +  strcat(fragment_shader, fragment_shader_end);        /*SEB*/
776    glShaderSource(fragment_shader_object, 1, (const GLchar**)&fragment_shader, NULL);
777    free(fragment_shader);
778  
779 @@ -408,6 +416,7 @@
780    int dither_enabled;
781    int blackandwhite0;
782    int blackandwhite1;
783 +  int alpha_test;                      //*SEB*
784    GLuint fragment_shader_object;
785    GLuint program_object;
786    int texture0_location;
787 @@ -489,6 +498,8 @@
788    int i;
789    int chroma_color_location;
790    int log_length;
791 +  
792 +  int noalpha;
793  
794    need_to_compile = 0;
795  
796 @@ -502,6 +513,7 @@
797        prog.texture0_combinera == texture0_combinera_key &&
798        prog.texture1_combinera == texture1_combinera_key &&
799        prog.fog_enabled == fog_enabled &&
800 +         prog.alpha_test == alpha_test &&                              //*SEB*
801        prog.chroma_enabled == chroma_enabled &&
802        prog.dither_enabled == dither_enabled &&
803        prog.blackandwhite0 == blackandwhite0 &&
804 @@ -514,11 +526,13 @@
805      }
806    }
807  
808 -  if(shader_programs != NULL)
809 -    shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key));
810 +  if(shader_programs != NULL) {
811 +       if ((number_of_programs+1)>1024)
812 +               shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key));
813 +  }
814    else
815 -    shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key));
816 -  //printf("number of shaders %d\n", number_of_programs);
817 +    shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key)*1024);
818 +       //printf("number of shaders %d\n", number_of_programs);
819  
820    shader_programs[number_of_programs].color_combiner = color_combiner_key;
821    shader_programs[number_of_programs].alpha_combiner = alpha_combiner_key;
822 @@ -531,6 +545,7 @@
823    shader_programs[number_of_programs].dither_enabled = dither_enabled;
824    shader_programs[number_of_programs].blackandwhite0 = blackandwhite0;
825    shader_programs[number_of_programs].blackandwhite1 = blackandwhite1;
826 +  shader_programs[number_of_programs].alpha_test = alpha_test;         //*SEB*
827  
828    if(chroma_enabled)
829    {
830 @@ -557,7 +572,10 @@
831    strcat(fragment_shader, fragment_shader_color_combiner);
832    strcat(fragment_shader, fragment_shader_alpha_combiner);
833    if(fog_enabled) strcat(fragment_shader, fragment_shader_fog);
834 -  strcat(fragment_shader, fragment_shader_end);
835 +  if (alpha_test)
836 +               strcat(fragment_shader, fragment_shader_end);
837 +  else
838 +               strcat(fragment_shader, fragment_shader_alt_end);               //*SEB*
839    if(chroma_enabled) strcat(fragment_shader, fragment_shader_chroma);
840  
841    shader_programs[number_of_programs].fragment_shader_object = glCreateShader(GL_FRAGMENT_SHADER);
842 @@ -1719,7 +1737,7 @@
843    glActiveTexture(GL_TEXTURE2);
844    glEnable(GL_TEXTURE_2D);
845    glBindTexture(GL_TEXTURE_2D, 33*1024*1024);
846 -  glTexImage2D(GL_TEXTURE_2D, 0, 4, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
847 +  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
848    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
849    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
850    glDisable(GL_TEXTURE_2D);
851 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp ./Glitch64/geometry.cpp
852 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp   2013-09-06 22:05:30.000000000 +0200
853 +++ ./Glitch64/geometry.cpp     2013-09-12 22:13:33.000000000 +0200
854 @@ -34,7 +34,7 @@
855  #define VERTEX_SIZE sizeof(VERTEX) //Size of vertex struct
856  
857  #ifdef PAULSCODE
858 -#include "ae_bridge.h"
859 +//#include "ae_bridge.h"
860  static float polygonOffsetFactor;
861  static float polygonOffsetUnits;
862  #endif
863 @@ -338,8 +338,11 @@
864  void FindBestDepthBias()
865  {
866  #ifdef PAULSCODE
867 -  int hardwareType = Android_JNI_GetHardwareType();
868 -  Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits);
869 +/*  int hardwareType = Android_JNI_GetHardwareType();
870 +  Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits);*/
871 +//  glPolygonOffset(0.2f, 0.2f);
872 +       polygonOffsetFactor=0.2f;
873 +       polygonOffsetUnits=0.2f;
874  #else
875    float f, bestz = 0.25f;
876    int x;
877 @@ -386,7 +389,11 @@
878    if (level)
879    {
880      #ifdef PAULSCODE
881 -    glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits);
882 +//    glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits);
883 +    if(w_buffer_mode)
884 +      glPolygonOffset(1.0f, -(float)level*polygonOffsetUnits);
885 +    else
886 +      glPolygonOffset(0, (float)level*3.0f);
887      #else
888      if(w_buffer_mode)
889        glPolygonOffset(1.0f, -(float)level*zscale/255.0f);
890 @@ -408,13 +415,13 @@
891  grDrawTriangle( const void *a, const void *b, const void *c )
892  {
893    LOG("grDrawTriangle()\r\n\t");
894 -  
895 +/*  
896    if(nvidia_viewport_hack && !render_to_texture)
897    {
898      glViewport(0, viewport_offset, viewport_width, viewport_height);
899      nvidia_viewport_hack = 0;
900    }
901 -
902 +*/
903    reloadTexture();
904  
905    if(need_to_compile) compile_shader();
906 @@ -588,13 +595,13 @@
907  {
908    void **pointers = (void**)pointers2;
909    LOG("grDrawVertexArray(%d,%d)\r\n", mode, Count);
910 -
911 +/*
912    if(nvidia_viewport_hack && !render_to_texture)
913    {
914      glViewport(0, viewport_offset, viewport_width, viewport_height);
915      nvidia_viewport_hack = 0;
916    }
917 -
918 +*/
919    reloadTexture();
920  
921    if(need_to_compile) compile_shader();
922 @@ -612,13 +619,13 @@
923  grDrawVertexArrayContiguous(FxU32 mode, FxU32 Count, void *pointers, FxU32 stride)
924  {
925    LOG("grDrawVertexArrayContiguous(%d,%d,%d)\r\n", mode, Count, stride);
926 -
927 +/*
928    if(nvidia_viewport_hack && !render_to_texture)
929    {
930      glViewport(0, viewport_offset, viewport_width, viewport_height);
931      nvidia_viewport_hack = 0;
932    }
933 -
934 +*/
935    if(stride != 156)
936    {
937           LOGINFO("Incompatible stride\n");
938 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp ./Glitch64/glitchmain.cpp
939 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp 2013-09-06 22:05:30.000000000 +0200
940 +++ ./Glitch64/glitchmain.cpp   2013-09-15 17:13:49.000000000 +0200
941 @@ -656,6 +656,9 @@
942  #ifdef _WIN32
943    glCompressedTexImage2DARB = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)wglGetProcAddress("glCompressedTexImage2DARB");
944  #endif
945 +/*SEB*/
946 +  glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
947 +  glPixelStorei(GL_PACK_ALIGNMENT, 1);
948  
949  
950  #ifdef _WIN32
951 @@ -806,6 +809,7 @@
952      fullscreen = 0;
953    }
954  #else
955 +  CoreVideo_Quit();
956    //SDL_QuitSubSystem(SDL_INIT_VIDEO);
957    //sleep(2);
958  #endif
959 @@ -823,7 +827,7 @@
960    int i;
961    static int fbs_init = 0;
962  
963 -  //printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
964 +       //printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
965    LOG("grTextureBufferExt(%d, %d, %d, %d %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
966    if (lodmin != lodmax) display_warning("grTextureBufferExt : loading more than one LOD");
967    if (!use_fbo) {
968 @@ -907,8 +911,8 @@
969        tmu_usage[rtmu].min = pBufferAddress;
970      if ((unsigned int) tmu_usage[rtmu].max < pBufferAddress+size)
971        tmu_usage[rtmu].max = pBufferAddress+size;
972 -    //   printf("tmu %d usage now %gMb - %gMb\n",
973 -    //          rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f);
974 +       //printf("tmu %d usage now %gMb - %gMb\n",
975 +    //      rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f);
976  
977  
978      width = pBufferWidth;
979 @@ -927,14 +931,14 @@
980      texbufs[i].fmt = fmt;
981      if (i == texbuf_i)
982        texbuf_i = (texbuf_i+1)&(NB_TEXBUFS-1);
983 -    //printf("texbuf %x fmt %x\n", pBufferAddress, fmt);
984 +       //printf("texbuf %x fmt %x\n", pBufferAddress, fmt);
985  
986      // ZIGGY it speeds things up to not delete the buffers
987      // a better thing would be to delete them *sometimes*
988      //   remove_tex(pBufferAddress+1, pBufferAddress + size);
989      add_tex(pBufferAddress);
990  
991 -    //printf("viewport %dx%d\n", width, height);
992 +       //printf("viewport %dx%d\n", width, height);
993      if (height > screen_height) {
994        glViewport( 0, viewport_offset + screen_height - height, width, height);
995      } else
996 @@ -1009,7 +1013,6 @@
997          }
998        }
999      }
1000 -
1001      remove_tex(pBufferAddress, pBufferAddress + width*height*2/*grTexFormatSize(fmt)*/);
1002      //create new FBO
1003      glGenFramebuffers( 1, &(fbs[nb_fb].fbid) );
1004 @@ -1768,6 +1771,7 @@
1005            GrLfbInfo_t *info )
1006  {
1007    LOG("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline);
1008 +//printf("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline);
1009    if (type == GR_LFB_WRITE_ONLY)
1010    {
1011      display_warning("grLfbLock : write only");
1012 @@ -1792,12 +1796,32 @@
1013      if(buffer != GR_BUFFER_AUXBUFFER)
1014      {
1015        if (writeMode == GR_LFBWRITEMODE_888) {
1016 +/*SEB*/
1017 +        buf = (unsigned char*)malloc(width*height*4);
1018          //printf("LfbLock GR_LFBWRITEMODE_888\n");
1019          info->lfbPtr = frameBuffer;
1020          info->strideInBytes = width*4;
1021          info->writeMode = GR_LFBWRITEMODE_888;
1022          info->origin = origin;
1023          //glReadPixels(0, viewport_offset, width, height, GL_BGRA, GL_UNSIGNED_BYTE, frameBuffer);
1024 +        glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1025 +
1026 +/*SEB*/
1027 +           unsigned char *p=buf;
1028 +        for (j=0; j<height; j++)
1029 +        {
1030 +           short unsigned int *f=frameBuffer+(height-j-1)*width;
1031 +          for (i=0; i<width; i++)
1032 +          {
1033 +            *(f++) =
1034 +              (*(p)   <<24) |
1035 +              (*(p+1) <<16) |
1036 +              (*(p+2) << 8) |
1037 +                 (0xff);
1038 +              p+=4;
1039 +          }
1040 +        }
1041 +        free(buf);
1042        } else {
1043          buf = (unsigned char*)malloc(width*height*4);
1044  
1045 @@ -1807,14 +1831,22 @@
1046          info->origin = origin;
1047          glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1048  
1049 +/*SEB*/
1050 +           unsigned char *p=buf;
1051          for (j=0; j<height; j++)
1052          {
1053 +             short unsigned int *f=frameBuffer+(height-j-1)*width;
1054            for (i=0; i<width; i++)
1055            {
1056 -            frameBuffer[(height-j-1)*width+i] =
1057 +/*            frameBuffer[(height-j-1)*width+i] =
1058                ((buf[j*width*4+i*4+0] >> 3) << 11) |
1059                ((buf[j*width*4+i*4+1] >> 2) <<  5) |
1060 -              (buf[j*width*4+i*4+2] >> 3);
1061 +              (buf[j*width*4+i*4+2] >> 3);*/
1062 +            *(f++) =
1063 +              ((*(p)   >> 3) << 11) |
1064 +              ((*(p+1) >> 2) <<  5) |
1065 +              (*(p+2)  >> 3);
1066 +              p+=4;
1067            }
1068          }
1069          free(buf);
1070 @@ -1826,6 +1858,7 @@
1071        info->strideInBytes = width*2;
1072        info->writeMode = GR_LFBWRITEMODE_ZA16;
1073        info->origin = origin;
1074 +      //*SEB* *TODO* check alignment
1075        glReadPixels(0, viewport_offset, width, height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer);
1076      }
1077    }
1078 @@ -1855,6 +1888,7 @@
1079    unsigned short *frameBuffer = (unsigned short*)dst_data;
1080    unsigned short *depthBuffer = (unsigned short*)dst_data;
1081    LOG("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride);
1082 +//printf("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride);
1083  
1084    switch(src_buffer)
1085    {
1086 @@ -1876,15 +1910,22 @@
1087      buf = (unsigned char*)malloc(src_width*src_height*4);
1088  
1089      glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1090 -
1091      for (j=0; j<src_height; j++)
1092      {
1093 +/*SEB*/
1094 +      unsigned char *p=buf+(src_height-j-1)*src_width*4;
1095 +      unsigned short *f=frameBuffer+(j*dst_stride/2);
1096        for (i=0; i<src_width; i++)
1097        {
1098 -        frameBuffer[j*(dst_stride/2)+i] =
1099 +/*        frameBuffer[j*(dst_stride/2)+i] =
1100            ((buf[(src_height-j-1)*src_width*4+i*4+0] >> 3) << 11) |
1101            ((buf[(src_height-j-1)*src_width*4+i*4+1] >> 2) <<  5) |
1102 -          (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3);
1103 +          (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3);*/
1104 +        *(f++) =
1105 +          ((*(p) >> 3) << 11) |
1106 +          ((*(p+1) >> 2) <<  5) |
1107 +          (*(p+2) >> 3);
1108 +         p+=4;
1109        }
1110      }
1111      free(buf);
1112 @@ -1892,15 +1933,19 @@
1113    else
1114    {
1115      buf = (unsigned char*)malloc(src_width*src_height*2);
1116 -
1117 -    glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer);
1118 +//*SEB read in buf, not depthBuffer.
1119 +    glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, buf);
1120  
1121      for (j=0;j<src_height; j++)
1122      {
1123 +//*SEB*
1124 +      unsigned short *d=depthBuffer+j*dst_stride/2;
1125 +      unsigned short *p=(unsigned short*)buf+(src_height-j-1)*src_width; //orignal look fishy. why *4???
1126        for (i=0; i<src_width; i++)
1127        {
1128 -        depthBuffer[j*(dst_stride/2)+i] =
1129 -          ((unsigned short*)buf)[(src_height-j-1)*src_width*4+i*4];
1130 +/*        depthBuffer[j*(dst_stride/2)+i] =
1131 +          ((unsigned short*)buf)[(src_height-j-1)*src_width*4+i*4];*/
1132 +        *(d++) = *(p++); //why *4 (prob. GL_PACK was=4), plus transcoding to short, that make *8 ???
1133        }
1134      }
1135      free(buf);
1136 @@ -1923,6 +1968,7 @@
1137    int texture_number;
1138    unsigned int tex_width = 1, tex_height = 1;
1139    LOG("grLfbWriteRegion(%d,%d,%d,%d,%d,%d,%d,%d)\r\n",dst_buffer, dst_x, dst_y, src_format, src_width, src_height, pixelPipeline, src_stride);
1140 +//printf("grLfbWriteRegion(%d,%d,%d,%d,%d,%d,%d,%d)\r\n",dst_buffer, dst_x, dst_y, src_format, src_width, src_height, pixelPipeline, src_stride);
1141  
1142    //glPushAttrib(GL_ALL_ATTRIB_BITS);
1143  
1144 @@ -1949,6 +1995,12 @@
1145      glActiveTexture(texture_number);
1146  
1147      const unsigned int half_stride = src_stride / 2;
1148 +
1149 +    const int comp_stride = half_stride - src_width;
1150 +    const int comp_tex = (tex_width - src_width)*4;
1151 +    unsigned short *f=frameBuffer;
1152 +    unsigned char *p=buf;
1153 +
1154      switch(src_format)
1155      {
1156      case GR_LFB_SRC_FMT_1555:
1157 @@ -1956,12 +2008,20 @@
1158        {
1159          for (i=0; i<src_width; i++)
1160          {
1161 -          const unsigned int col = frameBuffer[j*half_stride+i];
1162 +/*          const unsigned int col = frameBuffer[j*half_stride+i];
1163            buf[j*tex_width*4+i*4+0]=((col>>10)&0x1F)<<3;
1164            buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3;
1165            buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
1166 -          buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0;
1167 +          buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0;*/
1168 +          const unsigned int col = *(f++);
1169 +          *(p)=((col>>10)&0x1F)<<3;
1170 +          *(p+1)=((col>>5)&0x1F)<<3;
1171 +          *(p+2)=((col>>0)&0x1F)<<3;
1172 +          *(p+3)= (col>>15) ? 0xFF : 0;
1173 +         p+=4;
1174          }
1175 +       p+=comp_tex;
1176 +       f+=comp_stride;
1177        }
1178        break;
1179      case GR_LFBWRITEMODE_555:
1180 @@ -1969,12 +2029,20 @@
1181        {
1182          for (i=0; i<src_width; i++)
1183          {
1184 -          const unsigned int col = frameBuffer[j*half_stride+i];
1185 +/*          const unsigned int col = frameBuffer[j*half_stride+i];
1186            buf[j*tex_width*4+i*4+0]=((col>>10)&0x1F)<<3;
1187            buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3;
1188            buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
1189 -          buf[j*tex_width*4+i*4+3]=0xFF;
1190 +          buf[j*tex_width*4+i*4+3]=0xFF;*/
1191 +          const unsigned int col = *(f++);
1192 +          *(p)=((col>>10)&0x1F)<<3;
1193 +          *(p+1)=((col>>5)&0x1F)<<3;
1194 +          *(p+2)=((col>>0)&0x1F)<<3;
1195 +          *(p+3)=0xFF;
1196 +         p+=4;
1197          }
1198 +       p+=comp_tex;
1199 +       f+=comp_stride;
1200        }
1201        break;
1202      case GR_LFBWRITEMODE_565:
1203 @@ -1982,12 +2050,20 @@
1204        {
1205          for (i=0; i<src_width; i++)
1206          {
1207 -          const unsigned int col = frameBuffer[j*half_stride+i];
1208 +/*          const unsigned int col = frameBuffer[j*half_stride+i];
1209            buf[j*tex_width*4+i*4+0]=((col>>11)&0x1F)<<3;
1210            buf[j*tex_width*4+i*4+1]=((col>>5)&0x3F)<<2;
1211            buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
1212 -          buf[j*tex_width*4+i*4+3]=0xFF;
1213 +          buf[j*tex_width*4+i*4+3]=0xFF;*/
1214 +          const unsigned int col = *(f++);
1215 +          *(p)=((col>>11)&0x1F)<<3;
1216 +          *(p+1)=((col>>5)&0x3F)<<2;
1217 +          *(p+2)=((col>>0)&0x1F)<<3;
1218 +          *(p+3)=0xFF;
1219 +         p+=4;
1220          }
1221 +       p+=comp_tex;
1222 +       f+=comp_stride;
1223        }
1224        break;
1225      default:
1226 @@ -2006,7 +2082,7 @@
1227  #endif
1228  
1229      glBindTexture(GL_TEXTURE_2D, default_texture);
1230 -    glTexImage2D(GL_TEXTURE_2D, 0, 4, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1231 +    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1232      free(buf);
1233  
1234      set_copy_shader();
1235 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp ./Glitch64/textures.cpp
1236 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp   2013-09-06 22:05:31.000000000 +0200
1237 +++ ./Glitch64/textures.cpp     2013-09-13 11:32:50.000000000 +0200
1238 @@ -26,6 +26,7 @@
1239  #include "glide.h"
1240  #include "main.h"
1241  #include <stdio.h>
1242 +#include <string.h>
1243  
1244  /* Napalm extensions to GrTextureFormat_t */
1245  #define GR_TEXFMT_ARGB_CMP_FXT1           0x11
1246 @@ -107,7 +108,7 @@
1247    }
1248    glDeleteTextures(n, t);
1249    free(t);
1250 -  //printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax);
1251 +//printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax);
1252  }
1253  
1254  
1255 @@ -115,7 +116,7 @@
1256  {
1257    texlist *aux = list;
1258    texlist *aux2;
1259 -  //printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id);
1260 +//printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id);
1261    if (list == NULL || id < list->id)
1262    {
1263      nbTex++;
1264 @@ -435,8 +436,11 @@
1265      factor = -1;
1266    else
1267      factor = grTexFormat2GLPackedFmt(info->format, &gltexfmt, &glpixfmt, &glpackfmt);
1268 -
1269 +//printf("grTexDownloadMipmap, id=%x, size=%ix%i, format=%x\n", startAddress+1, width, height, info->format);
1270    if (factor < 0) {
1271 +    gltexfmt = GL_RGBA;
1272 +    glpixfmt = GL_RGBA;
1273 +    glpackfmt = GL_UNSIGNED_BYTE;
1274  
1275      // VP fixed the texture conversions to be more accurate, also swapped
1276      // the for i/j loops so that is is less likely to break the memory cache
1277 @@ -444,7 +448,7 @@
1278      switch(info->format)
1279      {
1280      case GR_TEXFMT_ALPHA_8:
1281 -      for (i=0; i<height; i++)
1282 + /*     for (i=0; i<height; i++)
1283        {
1284          for (j=0; j<width; j++)
1285          {
1286 @@ -457,10 +461,25 @@
1287          }
1288        }
1289        factor = 1;
1290 -      glformat = GL_RGBA;
1291 +      glformat = GL_RGBA;*/
1292 +
1293 +     for (i=0; i<height; i++)
1294 +      {
1295 +        for (j=0; j<width; j++)
1296 +        {
1297 +          unsigned short texel = (unsigned short)((unsigned char*)info->data)[m];
1298 +          ((unsigned short*)texture)[n] = texel|(texel<<8);
1299 +          m++;
1300 +          n++;
1301 +        }
1302 +      }
1303 +
1304 +      glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
1305 +      glpackfmt = GL_UNSIGNED_BYTE;
1306 +      factor = 1;
1307        break;
1308      case GR_TEXFMT_INTENSITY_8: // I8 support - H.Morii
1309 -      for (i=0; i<height; i++)
1310 +/*      for (i=0; i<height; i++)
1311        {
1312          for (j=0; j<width; j++)
1313          {
1314 @@ -470,9 +489,13 @@
1315            m++;
1316            n++;
1317          }
1318 -      }
1319 +      }*/
1320 +      factor = 1;
1321 +//      glformat = GL_ALPHA;
1322 +      memcpy(texture, info->data, width*height);
1323 +      glformat = gltexfmt = glpixfmt = GL_LUMINANCE;
1324 +      glpackfmt = GL_UNSIGNED_BYTE;
1325        factor = 1;
1326 -      glformat = GL_ALPHA;
1327        break;
1328      case GR_TEXFMT_ALPHA_INTENSITY_44:
1329  #if 1
1330 @@ -480,9 +503,9 @@
1331        {
1332          for (j=0; j<width; j++)
1333          {
1334 -          unsigned int texel = (unsigned int)((unsigned char*)info->data)[m];
1335 +/*          unsigned int texel = (unsigned int)((unsigned char*)info->data)[m];
1336  #if 1
1337 -          /* accurate conversion */
1338 +          // accurate conversion
1339            unsigned int texel_hi = (texel & 0x000000F0) << 20;
1340            unsigned int texel_low = texel & 0x0000000F;
1341            texel_low |= (texel_low << 4);
1342 @@ -493,61 +516,90 @@
1343            texel_hi |= ((texel_low << 16) | (texel_low << 8) | texel_low);
1344  #endif
1345            ((unsigned int*)texture)[n] = texel_hi;
1346 +*/
1347 +         unsigned char texel = ((unsigned char*)info->data)[m];
1348 +          unsigned short texel_hi = (texel & 0x000000F0) << 4;
1349 +          unsigned short texel_low = texel & 0x0000000F;
1350 +          texel_low |= (texel_low << 4);
1351 +          texel_hi |= ((texel_hi << 4) | (texel_low));
1352 +         ((unsigned short*)texture)[n] = texel_hi;
1353            m++;
1354            n++;
1355          }
1356        }
1357        factor = 1;
1358 -      glformat = GL_LUMINANCE_ALPHA;
1359 +      glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
1360 +      glpackfmt = GL_UNSIGNED_BYTE;
1361 +//      glformat = GL_LUMINANCE_ALPHA;
1362  #endif
1363        break;
1364      case GR_TEXFMT_RGB_565:
1365 -      for (i=0; i<height; i++)
1366 +/*      for (i=0; i<height; i++)
1367        {
1368          for (j=0; j<width; j++)
1369 -        {
1370 -          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1371 +        {*/
1372 +/*          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1373            unsigned int B = texel & 0x0000F800;
1374            unsigned int G = texel & 0x000007E0;
1375            unsigned int R = texel & 0x0000001F;
1376  #if 0
1377 -          /* accurate conversion */
1378 +          // accurate conversion 
1379            ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | ((R >> 2) << 16) | (G << 5) | ((G >> 9) << 8) | (B >> 8) | (B >> 13);
1380  #else
1381            ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | (G << 5) | (B >> 8);
1382  #endif
1383 +*/
1384 +/*       const unsigned short texel = ((unsigned short*)info->data)[m];
1385 +          const unsigned short B = (texel & 0xF800)>>11;
1386 +          const unsigned short G = texel & 0x07E0;
1387 +          const unsigned short R = (texel & 0x001F)<<11;
1388 +          ((unsigned short*)texture)[n] = R|G|B;
1389            m++;
1390            n++;
1391          }
1392 -      }
1393 +      }*/
1394 +      memcpy(texture, info->data, width*height*2);
1395        factor = 2;
1396 -      glformat = GL_RGB;
1397 +//      glformat = GL_RGB;
1398 +      glformat = gltexfmt = glpixfmt = GL_RGB;
1399 +      glpackfmt = GL_UNSIGNED_SHORT_5_6_5;
1400        break;
1401      case GR_TEXFMT_ARGB_1555:
1402        for (i=0; i<height; i++)
1403        {
1404          for (j=0; j<width; j++)
1405          {
1406 -          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1407 +/*          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1408            unsigned int A = texel & 0x00008000 ? 0xFF000000 : 0;
1409            unsigned int B = texel & 0x00007C00;
1410            unsigned int G = texel & 0x000003E0;
1411            unsigned int R = texel & 0x0000001F;
1412  #if 0
1413 -          /* accurate conversion */
1414 +          // accurate conversion
1415            ((unsigned int*)texture)[n] = A | (R << 19) | ((R >> 2) << 16) | (G << 6) | ((G >> 8) << 8) | (B >> 7) | (B >> 12);
1416  #else
1417            ((unsigned int*)texture)[n] = A | (R << 19) | (G << 6) | (B >> 7);
1418  #endif
1419 +*/
1420 +          unsigned short texel = ((unsigned short*)info->data)[m];
1421 +          unsigned short A = (texel & 0x8000)>>15;
1422 +         ((unsigned short*)texture)[n] = A|(texel&0x7fff)<<1;
1423 +/*
1424 +          unsigned short B = (texel & 0x7C00)>>9;
1425 +          unsigned short G = texel & 0x03E0<<1;
1426 +          unsigned short R = (texel & 0x001F)<<11;
1427 +          ((unsigned short*)texture)[n] = A|R|G|B;*/
1428            m++;
1429            n++;
1430          }
1431        }
1432        factor = 2;
1433 -      glformat = GL_RGBA;
1434 +//      glformat = GL_RGBA;
1435 +      glformat = gltexfmt = glpixfmt = GL_RGBA;
1436 +      glpackfmt = GL_UNSIGNED_SHORT_5_5_5_1;
1437        break;
1438      case GR_TEXFMT_ALPHA_INTENSITY_88:
1439 -      for (i=0; i<height; i++)
1440 +/*      for (i=0; i<height; i++)
1441        {
1442          for (j=0; j<width; j++)
1443          {
1444 @@ -557,9 +609,12 @@
1445            m++;
1446            n++;
1447          }
1448 -      }
1449 +      }*/
1450 +      memcpy(texture, info->data, width*height*2);
1451        factor = 2;
1452        glformat = GL_LUMINANCE_ALPHA;
1453 +      glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
1454 +      glpackfmt = GL_UNSIGNED_BYTE;
1455        break;
1456      case GR_TEXFMT_ARGB_4444:
1457  
1458 @@ -567,23 +622,29 @@
1459        {
1460          for (j=0; j<width; j++)
1461          {
1462 -          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1463 +/*          unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1464            unsigned int A = texel & 0x0000F000;
1465            unsigned int B = texel & 0x00000F00;
1466            unsigned int G = texel & 0x000000F0;
1467            unsigned int R = texel & 0x0000000F;
1468  #if 0
1469 -          /* accurate conversion */
1470 +          // accurate conversion
1471            ((unsigned int*)texture)[n] = (A << 16) | (A << 12) | (R << 20) | (R << 16) | (G << 8) | (G << 4) | (B >> 4) | (B >> 8);
1472  #else
1473            ((unsigned int*)texture)[n] = (A << 16) | (R << 20) | (G << 8) | (B >> 4);
1474  #endif
1475 +*/
1476 +          unsigned short texel = ((unsigned short*)info->data)[m];
1477 +          unsigned int A = (texel & 0xF000)>>12;
1478 +          ((unsigned short*)texture)[n] = A|(texel&0x0fff)<<4;
1479            m++;
1480            n++;
1481          }
1482        }
1483        factor = 2;
1484        glformat = GL_RGBA;
1485 +      glformat = gltexfmt = glpixfmt = GL_RGBA;
1486 +      glpackfmt = GL_UNSIGNED_SHORT_4_4_4_4;
1487        break;
1488      case GR_TEXFMT_ARGB_8888:
1489        for (i=0; i<height; i++)
1490 @@ -650,7 +711,9 @@
1491    if (largest_supported_anisotropy > 1.0f)
1492      glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, largest_supported_anisotropy);
1493  
1494 -  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
1495 +//*SEB*  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
1496 +//printf("new texture, id=%x, size=%ix%i, fmt=%x/%x\n", startAddress+1, width, height, gltexfmt, glpackfmt);
1497 +  glTexImage2D(GL_TEXTURE_2D, 0, gltexfmt, width, height, 0, glpixfmt, glpackfmt, texture);
1498  /*
1499    switch(info->format)
1500    {