1 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp ./Glide64/3dmath.cpp
2 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.cpp 2013-09-06 22:05:28.000000000 +0200
3 +++ ./Glide64/3dmath.cpp 2013-09-14 09:41:13.000000000 +0200
9 +void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4])
12 + "vld1.32 {d0, d1}, [%1]! \n\t" //q0 = m1
13 + "vld1.32 {d2, d3}, [%1]! \n\t" //q1 = m1+4
14 + "vld1.32 {d4, d5}, [%1]! \n\t" //q2 = m1+8
15 + "vld1.32 {d6, d7}, [%1] \n\t" //q3 = m1+12
16 + "vld1.32 {d16, d17}, [%0]! \n\t" //q8 = m0
17 + "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m0+4
18 + "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m0+8
19 + "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m0+12
21 + "vmul.f32 q12, q8, d0[0] \n\t" //q12 = q8 * d0[0]
22 + "vmul.f32 q13, q8, d2[0] \n\t" //q13 = q8 * d2[0]
23 + "vmul.f32 q14, q8, d4[0] \n\t" //q14 = q8 * d4[0]
24 + "vmul.f32 q15, q8, d6[0] \n\t" //q15 = q8 * d6[0]
25 + "vmla.f32 q12, q9, d0[1] \n\t" //q12 = q9 * d0[1]
26 + "vmla.f32 q13, q9, d2[1] \n\t" //q13 = q9 * d2[1]
27 + "vmla.f32 q14, q9, d4[1] \n\t" //q14 = q9 * d4[1]
28 + "vmla.f32 q15, q9, d6[1] \n\t" //q15 = q9 * d6[1]
29 + "vmla.f32 q12, q10, d1[0] \n\t" //q12 = q10 * d0[0]
30 + "vmla.f32 q13, q10, d3[0] \n\t" //q13 = q10 * d2[0]
31 + "vmla.f32 q14, q10, d5[0] \n\t" //q14 = q10 * d4[0]
32 + "vmla.f32 q15, q10, d7[0] \n\t" //q15 = q10 * d6[0]
33 + "vmla.f32 q12, q11, d1[1] \n\t" //q12 = q11 * d0[1]
34 + "vmla.f32 q13, q11, d3[1] \n\t" //q13 = q11 * d2[1]
35 + "vmla.f32 q14, q11, d5[1] \n\t" //q14 = q11 * d4[1]
36 + "vmla.f32 q15, q11, d7[1] \n\t" //q15 = q11 * d6[1]
38 + "vst1.32 {d24, d25}, [%2]! \n\t" //d = q12
39 + "vst1.32 {d26, d27}, [%2]! \n\t" //d+4 = q13
40 + "vst1.32 {d28, d29}, [%2]! \n\t" //d+8 = q14
41 + "vst1.32 {d30, d31}, [%2] \n\t" //d+12 = q15
43 + :"+r"(m0), "+r"(m1), "+r"(dest):
44 + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
45 + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
46 + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
51 +void Normalize_neon(float v[3])
54 + "vld1.32 {d4}, [%0]! \n\t" //d4={x,y}
55 + "flds s10, [%0] \n\t" //d5[0] = z
56 + "sub %0, %0, #8 \n\t" //d5[0] = z
57 + "vmul.f32 d0, d4, d4 \n\t" //d0= d4*d4
58 + "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1]
59 + "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5
61 + "vmov.f32 d1, d0 \n\t" //d1 = d0
62 + "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
63 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
64 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
65 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
66 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
67 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2
68 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
70 + "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4
71 + "vst1.32 {d4}, [%0]! \n\t" //d2={x0,y0}, d3={z0, w0}
72 + "fsts s10, [%0] \n\t" //d2={x0,y0}, d3={z0, w0}
75 + : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
79 +float DotProduct_neon( float v0[3], float v1[3] )
83 + "vld1.32 {d8}, [%1]! \n\t" //d8={x0,y0}
84 + "vld1.32 {d10}, [%2]! \n\t" //d10={x1,y1}
85 + "flds s18, [%1, #0] \n\t" //d9[0]={z0}
86 + "flds s22, [%2, #0] \n\t" //d11[0]={z1}
87 + "vmul.f32 d12, d8, d10 \n\t" //d0= d2*d4
88 + "vpadd.f32 d12, d12, d12 \n\t" //d0 = d[0] + d[1]
89 + "vmla.f32 d12, d9, d11 \n\t" //d0 = d0 + d3*d5
90 + "fmrs %0, s24 \n\t" //r0 = s0
91 + : "=r"(dot), "+r"(v0), "+r"(v1):
92 + : "d8", "d9", "d10", "d11", "d12"
100 // 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication
101 // and 3DNOW! 4x4 4x4 matrix multiplication
102 // 2011-01-03 Balrog - removed because is in NASM format and not 64-bit compatible
103 // This will need fixing.
104 +#ifndef __ARM_NEON__
105 MULMATRIX MulMatrices = MulMatricesC;
106 TRANSFORMVECTOR TransformVector = TransformVectorC;
107 TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;
108 DOTPRODUCT DotProduct = DotProductC;
109 NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;
112 void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
118 +#ifndef __ARM_NEON__
121 #if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)
126 +#endif //__ARM_NEON__
128 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h ./Glide64/3dmath.h
129 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmath.h 2013-09-06 22:05:28.000000000 +0200
130 +++ ./Glide64/3dmath.h 2013-09-14 19:01:12.000000000 +0200
132 void calc_sphere (VERTEX *v);
136 +float DotProductC(register float *v1, register float *v2);
137 +void NormalizeVectorC(float *v);
138 +void TransformVectorC(float *src, float *dst, float mat[4][4]);
139 +void InverseTransformVectorC (float *src, float *dst, float mat[4][4]);
140 +void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4]);
141 +void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4]);
142 +void Normalize_neon(float v[3]);
143 +float DotProduct_neon( float v0[3], float v1[3] );
145 +#define MulMatrices MulMatricesC //MultMatrix_neon
146 +#define TransformVector TransformVectorC
147 +#define InverseTransformVector InverseTransformVectorC
148 +#define DotProduct DotProductC //DotProduct_neon
149 +#define NormalizeVector NormalizeVectorC //Normalize_neon
151 typedef void (*MULMATRIX)(float m1[4][4],float m2[4][4],float r[4][4]);
152 extern MULMATRIX MulMatrices;
153 typedef void (*TRANSFORMVECTOR)(float *src,float *dst,float mat[4][4]);
155 extern DOTPRODUCT DotProduct;
156 typedef void (*NORMALIZEVECTOR)(float *v);
157 extern NORMALIZEVECTOR NormalizeVector;
159 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp ./Glide64/3dmathneon.cpp
160 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/3dmathneon.cpp 1970-01-01 01:00:00.000000000 +0100
161 +++ ./Glide64/3dmathneon.cpp 2013-09-13 23:05:47.000000000 +0200
165 +static void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4])
168 + "vld1.32 {d0, d1}, [%1]! \n\t" //q0 = m1
169 + "vld1.32 {d2, d3}, [%1]! \n\t" //q1 = m1+4
170 + "vld1.32 {d4, d5}, [%1]! \n\t" //q2 = m1+8
171 + "vld1.32 {d6, d7}, [%1] \n\t" //q3 = m1+12
172 + "vld1.32 {d16, d17}, [%0]! \n\t" //q8 = m0
173 + "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m0+4
174 + "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m0+8
175 + "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m0+12
177 + "vmul.f32 q12, q8, d0[0] \n\t" //q12 = q8 * d0[0]
178 + "vmul.f32 q13, q8, d2[0] \n\t" //q13 = q8 * d2[0]
179 + "vmul.f32 q14, q8, d4[0] \n\t" //q14 = q8 * d4[0]
180 + "vmul.f32 q15, q8, d6[0] \n\t" //q15 = q8 * d6[0]
181 + "vmla.f32 q12, q9, d0[1] \n\t" //q12 = q9 * d0[1]
182 + "vmla.f32 q13, q9, d2[1] \n\t" //q13 = q9 * d2[1]
183 + "vmla.f32 q14, q9, d4[1] \n\t" //q14 = q9 * d4[1]
184 + "vmla.f32 q15, q9, d6[1] \n\t" //q15 = q9 * d6[1]
185 + "vmla.f32 q12, q10, d1[0] \n\t" //q12 = q10 * d0[0]
186 + "vmla.f32 q13, q10, d3[0] \n\t" //q13 = q10 * d2[0]
187 + "vmla.f32 q14, q10, d5[0] \n\t" //q14 = q10 * d4[0]
188 + "vmla.f32 q15, q10, d7[0] \n\t" //q15 = q10 * d6[0]
189 + "vmla.f32 q12, q11, d1[1] \n\t" //q12 = q11 * d0[1]
190 + "vmla.f32 q13, q11, d3[1] \n\t" //q13 = q11 * d2[1]
191 + "vmla.f32 q14, q11, d5[1] \n\t" //q14 = q11 * d4[1]
192 + "vmla.f32 q15, q11, d7[1] \n\t" //q15 = q11 * d6[1]
194 + "vst1.32 {d24, d25}, [%2]! \n\t" //d = q12
195 + "vst1.32 {d26, d27}, [%2]! \n\t" //d+4 = q13
196 + "vst1.32 {d28, d29}, [%2]! \n\t" //d+8 = q14
197 + "vst1.32 {d30, d31}, [%2] \n\t" //d+12 = q15
199 + :"+r"(m0), "+r"(m1), "+r"(dest):
200 + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
201 + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
202 + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
207 +static void TransformVectorNormalize_neon(float vec[3], float mtx[4][4])
210 + "vld1.32 {d0}, [%1] \n\t" //Q0 = v
211 + "flds s2, [%1, #8] \n\t" //Q0 = v
212 + "vld1.32 {d18, d19}, [%0]! \n\t" //Q1 = m
213 + "vld1.32 {d20, d21}, [%0]! \n\t" //Q2 = m+4
214 + "vld1.32 {d22, d23}, [%0] \n\t" //Q3 = m+8
216 + "vmul.f32 q2, q9, d0[0] \n\t" //q2 = q9*Q0[0]
217 + "vmla.f32 q2, q10, d0[1] \n\t" //Q5 += Q1*Q0[1]
218 + "vmla.f32 q2, q11, d1[0] \n\t" //Q5 += Q2*Q0[2]
220 + "vmul.f32 d0, d4, d4 \n\t" //d0 = d0*d0
221 + "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1]
222 + "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d1*d1
224 + "vmov.f32 d1, d0 \n\t" //d1 = d0
225 + "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
226 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
227 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
228 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
229 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
230 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2
231 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
233 + "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4
235 + "vst1.32 {d4}, [%1] \n\t" //Q4 = m+12
236 + "fsts s10, [%1, #8] \n\t" //Q4 = m+12
237 + : "+r"(mtx): "r"(vec)
238 + : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory"
242 +static void Normalize_neon(float v[3])
245 + "vld1.32 {d4}, [%0]! \n\t" //d4={x,y}
246 + "flds s10, [%0] \n\t" //d5[0] = z
247 + "sub %0, %0, #8 \n\t" //d5[0] = z
248 + "vmul.f32 d0, d4, d4 \n\t" //d0= d4*d4
249 + "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1]
250 + "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5
252 + "vmov.f32 d1, d0 \n\t" //d1 = d0
253 + "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
254 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
255 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
256 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
257 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
258 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2
259 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
261 + "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4
262 + "vst1.32 {d4}, [%0]! \n\t" //d2={x0,y0}, d3={z0, w0}
263 + "fsts s10, [%0] \n\t" //d2={x0,y0}, d3={z0, w0}
266 + : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
270 +static float DotProduct_neon( float v0[3], float v1[3] )
274 + "vld1.32 {d8}, [%1]! \n\t" //d8={x0,y0}
275 + "vld1.32 {d10}, [%2]! \n\t" //d10={x1,y1}
276 + "flds s18, [%1, #0] \n\t" //d9[0]={z0}
277 + "flds s22, [%2, #0] \n\t" //d11[0]={z1}
278 + "vmul.f32 d12, d8, d10 \n\t" //d0= d2*d4
279 + "vpadd.f32 d12, d12, d12 \n\t" //d0 = d[0] + d[1]
280 + "vmla.f32 d12, d9, d11 \n\t" //d0 = d0 + d3*d5
281 + "fmrs %0, s24 \n\t" //r0 = s0
282 + : "=r"(dot), "+r"(v0), "+r"(v1):
283 + : "d8", "d9", "d10", "d11", "d12"
291 + MulMatrices = MultMatrix_neon;
292 + //TransformVectorNormalize = TransformVectorNormalize_neon;
293 + NormalizeVector = Normalize_neon;
294 + DotProduct = DotProduct_neon;
296 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp ./Glide64/Config.cpp
297 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Config.cpp 2013-09-06 22:05:29.000000000 +0200
298 +++ ./Glide64/Config.cpp 2013-09-07 10:51:27.000000000 +0200
308 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp ./Glide64/CRC.cpp
309 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/CRC.cpp 2013-09-06 22:05:28.000000000 +0200
310 +++ ./Glide64/CRC.cpp 2013-09-08 13:12:00.000000000 +0200
313 //****************************************************************
316 #define CRC32_POLYNOMIAL 0x04C11DB7
318 unsigned int CRCTable[ 256 ];
324 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp ./Glide64/FBtoScreen.cpp
325 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/FBtoScreen.cpp 2013-09-06 22:05:29.000000000 +0200
326 +++ ./Glide64/FBtoScreen.cpp 2013-09-08 11:57:33.000000000 +0200
327 @@ -165,12 +165,15 @@
328 for (wxUint32 w = 0; w < 256; w++)
331 - r = (wxUint8)((col >> 24)&0xFF);
332 + r = (wxUint8)((col >> (24+3))&0x1F);
333 + g = (wxUint8)((col >> (16+2))&0x3F);
334 + b = (wxUint8)((col >> (8+3))&0x1F);
335 +/* r = (wxUint8)((col >> 24)&0xFF);
336 r = (wxUint8)((float)r / 255.0f * 31.0f);
337 g = (wxUint8)((col >> 16)&0xFF);
338 g = (wxUint8)((float)g / 255.0f * 63.0f);
339 b = (wxUint8)((col >> 8)&0xFF);
340 - b = (wxUint8)((float)b / 255.0f * 31.0f);
341 + b = (wxUint8)((float)b / 255.0f * 31.0f);*/ //*SEB*
342 *(dst++) = (r << 11) | (g << 5) | b;
344 src += (fb_info.width - 256);
345 @@ -261,12 +264,15 @@
349 - r = (wxUint8)((c32 >> 24)&0xFF);
350 + r = (wxUint8)((c32 >> (24+3))&0x1F);
351 + g = (wxUint8)((c32 >> (16+2))&0x3F);
352 + b = (wxUint8)((c32 >> (8+3))&0x1F);
353 +/* r = (wxUint8)((c32 >> 24)&0xFF);
354 r = (wxUint8)((float)r / 255.0f * 31.0f);
355 g = (wxUint8)((c32 >> 16)&0xFF);
356 g = (wxUint8)((float)g / 255.0f * 63.0f);
357 b = (wxUint8)((c32 >> 8)&0xFF);
358 - b = (wxUint8)((float)b / 255.0f * 31.0f);
359 + b = (wxUint8)((float)b / 255.0f * 31.0f);*/ //*SEB*
360 a = (c32&0xFF) ? 1 : 0;
361 *(dst++) = (a<<15) | (r << 10) | (g << 5) | b;
363 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h ./Glide64/Gfx_1.3.h
364 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Gfx_1.3.h 2013-09-06 22:05:29.000000000 +0200
365 +++ ./Glide64/Gfx_1.3.h 2013-09-08 16:22:57.000000000 +0200
367 // ** TAKE OUT BEFORE RELEASE!!! **
368 //#define LOGGING // log of spec functions called
369 //#define LOG_KEY // says "Key!!!" in the log when space bar is pressed
370 +//#define EXT_LOGGING
371 +//#define PERFORMANCE
375 @@ -120,15 +122,15 @@
377 #define FPS // fps counter able? (not enabled necessarily)
379 -#define LOGNOTKEY // Log if not pressing:
380 -#define LOGKEY 0x11 // this key (CONTROL)
381 +//#define LOGNOTKEY // Log if not pressing:
382 +//#define LOGKEY 0x11 // this key (CONTROL)
384 //#define LOG_COMMANDS // log the whole 64-bit command as (0x........, 0x........)
386 #define CATCH_EXCEPTIONS // catch exceptions so it doesn't freeze and will report
387 // "The gfx plugin has caused an exception" instead.
389 -#define FLUSH // flush the file buffer. slower logging, but makes sure
390 +//#define FLUSH // flush the file buffer. slower logging, but makes sure
391 // the command is logged before continuing (in case of
392 // crash or exception, the log will not be cut short)
393 #ifndef _ENDUSER_RELEASE_
398 -#define LARGE_TEXTURE_HANDLING // allow large-textured objects to be split?
399 +//#define LARGE_TEXTURE_HANDLING // allow large-textured objects to be split?
402 extern HHOOK hhkLowLevelKybd;
405 int CheckKeyPressed(int key, int mask);
407 -//#define PERFORMANCE
409 extern int64 perf_cur;
410 extern int64 perf_next;
411 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp ./Glide64/Main.cpp
412 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Main.cpp 2013-09-06 22:05:29.000000000 +0200
413 +++ ./Glide64/Main.cpp 2013-09-15 17:06:29.000000000 +0200
415 // 60=0x0, 70=0x1, 72=0x2, 75=0x3, 80=0x4, 90=0x5, 100=0x6, 85=0x7, 120=0x8, none=0xff
418 -#include "ae_bridge.h"
419 +//#include "ae_bridge.h"
420 #include "FrameSkipper.h"
421 FrameSkipper frameSkipper;
423 @@ -1768,12 +1768,13 @@
424 EXPORT void CALL RomClosed (void)
426 VLOG ("RomClosed ()\n");
427 +printf("RomClosed ()\n");
431 rdp.window_changed = TRUE;
433 - if (fullscreen && evoodoo)
434 +// if (fullscreen && evoodoo)//*SEB*
438 @@ -1973,9 +1974,6 @@
439 wxUint32 update_screen_count = 0;
440 EXPORT void CALL UpdateScreen (void)
443 - frameSkipper.update();
446 if (CheckKeyPressed(G64_VK_SPACE, 0x0001))
448 @@ -2020,6 +2018,9 @@
453 + frameSkipper.update();
458 @@ -2035,11 +2036,17 @@
459 rdp.updatescreen = 1;
463 + frameSkipper.update();
468 if (settings.swapmode == 0)
471 + frameSkipper.update();
475 static void DrawWholeFrameBufferToScreen()
476 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp ./Glide64/rdp.cpp
477 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/rdp.cpp 2013-09-06 22:05:29.000000000 +0200
478 +++ ./Glide64/rdp.cpp 2013-09-13 22:23:52.000000000 +0200
480 extern FrameSkipper frameSkipper;
488 const int NumOfFormats = 3;
489 SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
490 @@ -633,18 +637,21 @@
492 EXPORT void CALL ProcessDList(void)
494 - SoftLocker lock(mutexProcessDList);
495 +// SoftLocker lock(mutexProcessDList);
497 - if (frameSkipper.willSkipNext() || !lock.IsOk()) //mutex is busy
498 + if (frameSkipper.willSkipNext() /*|| !lock.IsOk()*/) //mutex is busy
500 - if (!lock.IsOk()) //mutex is busy
501 + if (/*!lock.IsOk()*/0) //mutex is busy
504 +// printf("Frameskip, reason=%s\n", (lock.IsOk())?"lock":"frameskip");
506 drawNoFullscreenMessage();
507 // Set an interrupt to allow the game to continue
508 *gfx.MI_INTR_REG |= 0x20;
509 gfx.CheckInterrupts();
510 + *gfx.MI_INTR_REG |= 0x01;
511 + gfx.CheckInterrupts();
522 + if (frameSkipper.willSkipNext())
524 + *gfx.MI_INTR_REG |= 0x20;
525 + gfx.CheckInterrupts();
526 + *gfx.MI_INTR_REG |= 0x01;
527 + gfx.CheckInterrupts();
533 if (settings.swapmode > 0)
536 rdp.pc[rdp.pc_i] = (a+8) & BMASK;
539 - perf_cur = wxDateTime::UNow();
540 + perf_cur = ticksGetTicks();
542 // Process this instruction
543 gfx_instruction[settings.ucode][rdp.cmd0>>24] ();
548 - perf_next = wxDateTime::UNow();
549 - sprintf (out_buf, "perf %08lx: %016I64d\n", a-8, (perf_next-perf_cur).Format(_T("%l")).mb_str());
550 + perf_next = ticksGetTicks();
551 + sprintf (out_buf, "perf %08x: %lli\n", a-8, (perf_next-perf_cur));
560 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp ./Glide64/Util.cpp
561 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glide64/Util.cpp 2013-09-06 22:05:29.000000000 +0200
562 +++ ./Glide64/Util.cpp 2013-09-08 12:39:52.000000000 +0200
563 @@ -289,29 +289,29 @@
565 if (linew == 0 && (fb_depth_render_enabled || (rdp.rm & 0xC00) == 0xC00))
567 - double X0 = vtx[0]->sx / rdp.scale_x;
568 - double Y0 = vtx[0]->sy / rdp.scale_y;
569 - double X1 = vtx[1]->sx / rdp.scale_x;
570 - double Y1 = vtx[1]->sy / rdp.scale_y;
571 - double X2 = vtx[2]->sx / rdp.scale_x;
572 - double Y2 = vtx[2]->sy / rdp.scale_y;
573 - double diffy_02 = Y0 - Y2;
574 - double diffy_12 = Y1 - Y2;
575 - double diffx_02 = X0 - X2;
576 - double diffx_12 = X1 - X2;
578 - double denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02);
579 - if(denom*denom > 0.0)
581 - double diffz_02 = vtx[0]->sz - vtx[2]->sz;
582 - double diffz_12 = vtx[1]->sz - vtx[2]->sz;
583 - double fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom;
584 + float X0 = vtx[0]->sx / rdp.scale_x;
585 + float Y0 = vtx[0]->sy / rdp.scale_y;
586 + float X1 = vtx[1]->sx / rdp.scale_x;
587 + float Y1 = vtx[1]->sy / rdp.scale_y;
588 + float X2 = vtx[2]->sx / rdp.scale_x;
589 + float Y2 = vtx[2]->sy / rdp.scale_y;
590 + float diffy_02 = Y0 - Y2;
591 + float diffy_12 = Y1 - Y2;
592 + float diffx_02 = X0 - X2;
593 + float diffx_12 = X1 - X2;
595 + float denom = (diffx_02 * diffy_12 - diffx_12 * diffy_02);
596 + if(denom*denom > 0.0f)
598 + float diffz_02 = vtx[0]->sz - vtx[2]->sz;
599 + float diffz_12 = vtx[1]->sz - vtx[2]->sz;
600 + float fdzdx = (diffz_02 * diffy_12 - diffz_12 * diffy_02) / denom;
601 if ((rdp.rm & 0xC00) == 0xC00) {
602 // Calculate deltaZ per polygon for Decal z-mode
603 - double fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom;
604 - double fdz = fabs(fdzdx) + fabs(fdzdy);
605 + float fdzdy = (diffz_02 * diffx_12 - diffz_12 * diffx_02) / denom;
606 + float fdz = fabs(fdzdx) + fabs(fdzdy);
607 if ((settings.hacks & hack_Zelda) && (rdp.rm & 0x800))
608 - fdz *= 4.0; // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads
609 + fdz *= 4.0f; // Decal mode in Zelda sometimes needs mutiplied deltaZ to work correct, e.g. roads
610 deltaZ = max(8, (int)fdz);
612 dzdx = (int)(fdzdx * 65536.0);
613 @@ -881,12 +881,12 @@
620 + float d; //*SEB* was doubles
625 -static double EvaLine(LineEuqationType &li, double x, double y)
626 +static float EvaLine(LineEuqationType &li, float x, float y) //*SEB* all double before
628 return li.x*x+li.y*y+li.d;
634 -__inline double interp3p(float a, float b, float c, double r1, double r2)
635 +__inline float interp3p(float a, float b, float c, float r1, float r2) //*SEB* r1 and r2 and function was double
637 return (a)+(((b)+((c)-(b))*(r2))-(a))*(r1);
639 @@ -915,34 +915,34 @@
640 (a+(((b)+((c)-(b))*(r2))-(a))*(r1))
643 -static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out)
644 +static void InterpolateColors3(VERTEX &v1, VERTEX &v2, VERTEX &v3, VERTEX &out) //*SEB* all double before
647 LineEuqationType line;
648 Create1LineEq(line, v2, v3, v1);
650 - double aDot = (out.x*line.x + out.y*line.y);
651 - double bDot = (v1.sx*line.x + v1.sy*line.y);
652 + float aDot = (out.x*line.x + out.y*line.y);
653 + float bDot = (v1.sx*line.x + v1.sy*line.y);
655 - double scale1 = ( - line.d - aDot) / ( bDot - aDot );
656 + float scale1 = ( - line.d - aDot) / ( bDot - aDot );
658 - double tx = out.x + scale1 * (v1.sx - out.x);
659 - double ty = out.y + scale1 * (v1.sy - out.y);
660 + float tx = out.x + scale1 * (v1.sx - out.x);
661 + float ty = out.y + scale1 * (v1.sy - out.y);
663 - double s1 = 101.0, s2 = 101.0;
664 - double den = tx - v1.sx;
665 - if (fabs(den) > 1.0)
666 + float s1 = 101.0, s2 = 101.0;
667 + float den = tx - v1.sx;
668 + if (fabsf(den) > 1.0)
669 s1 = (out.x-v1.sx)/den;
671 s1 = (out.y-v1.sy)/(ty-v1.sy);
674 - if (fabs(den) > 1.0)
675 + if (fabsf(den) > 1.0)
678 s2 =(ty-v2.sy)/(v3.sy-v2.sy);
680 - double w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2);
681 + float w = 1.0/interp3p(v1.oow,v2.oow,v3.oow,s1,s2);
683 out.r = real_to_char(interp3p(v1.r*v1.oow,v2.r*v2.oow,v3.r*v3.oow,s1,s2)*w);
684 out.g = real_to_char(interp3p(v1.g*v1.oow,v2.g*v2.oow,v3.g*v3.oow,s1,s2)*w);
687 float deltaS, deltaT;
688 float deltaX, deltaY;
689 - double deltaTexels, deltaPixels, lodFactor = 0;
691 + float deltaTexels, deltaPixels, lodFactor = 0; //*SEB* double before
692 + float intptr; //*SEB* double before
693 float s_scale = rdp.tiles[rdp.cur_tile].width / 255.0f;
694 float t_scale = rdp.tiles[rdp.cur_tile].height / 255.0f;
695 if (settings.lodmode == 1)
696 @@ -1019,7 +1019,7 @@
697 float lod_fraction = 1.0f;
698 if (lod_tile < rdp.cur_tile + rdp.mipmap_level)
700 - lod_fraction = max((float)modf(lodFactor / pow(2.,lod_tile),&intptr), rdp.prim_lodmin / 255.0f);
701 + lod_fraction = max((float)modff(lodFactor / powf(2.,lod_tile),&intptr), (float)rdp.prim_lodmin / 255.0f);
704 if (cmb.dc0_detailmax < 0.5f)
705 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp ./GlideHQ/TxDbg.cpp
706 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/GlideHQ/TxDbg.cpp 2013-09-06 22:05:30.000000000 +0200
707 +++ ./GlideHQ/TxDbg.cpp 2013-09-07 12:06:11.000000000 +0200
712 +#define _GLIBCXX_HAVE_BROKEN_VSWPRINTF 1
717 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp ./Glitch64/combiner.cpp
718 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/combiner.cpp 2013-09-06 22:05:30.000000000 +0200
719 +++ ./Glitch64/combiner.cpp 2013-09-14 10:16:36.000000000 +0200
728 static int fct[4], source0[4], operand0[4], source1[4], operand1[4], source2[4], operand2[4];
729 @@ -117,10 +119,11 @@
730 // using gl_FragCoord is terribly slow on ATI and varying variables don't work for some unknown
731 // reason, so we use the unused components of the texture2 coordinates
732 static const char* fragment_shader_dither =
733 -" float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n"
735 +/*" float dithx = (gl_TexCoord[2].b + 1.0)*0.5*1000.0; \n"
736 " float dithy = (gl_TexCoord[2].a + 1.0)*0.5*1000.0; \n"
737 " if(texture2D(ditherTex, vec2((dithx-32.0*floor(dithx/32.0))/32.0, \n"
738 -" (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n"
739 +" (dithy-32.0*floor(dithy/32.0))/32.0)).a > 0.5) discard; \n"*/
742 static const char* fragment_shader_default =
743 @@ -165,11 +168,16 @@
747 +static const char* fragment_shader_alt_end =
752 static const char* vertex_shader =
754 "#define Z_MAX 65536.0 \n"
755 "attribute highp vec4 aVertex; \n"
756 -"attribute highp vec4 aColor; \n"
757 +"attribute mediump vec4 aColor; \n" //*SEB* highp -> lowp
758 "attribute highp vec4 aMultiTexCoord0; \n"
759 "attribute highp vec4 aMultiTexCoord1; \n"
760 "attribute float aFog; \n"
763 // creating a fake texture
764 glBindTexture(GL_TEXTURE_2D, default_texture);
765 - glTexImage2D(GL_TEXTURE_2D, 0, 3, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
766 + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
767 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
768 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
771 strlen(fragment_shader_end)+1);
772 strcpy(fragment_shader, fragment_shader_header);
773 strcat(fragment_shader, fragment_shader_default);
774 - strcat(fragment_shader, fragment_shader_end);
775 + strcat(fragment_shader, fragment_shader_end); /*SEB*/
776 glShaderSource(fragment_shader_object, 1, (const GLchar**)&fragment_shader, NULL);
777 free(fragment_shader);
783 + int alpha_test; //*SEB*
784 GLuint fragment_shader_object;
785 GLuint program_object;
786 int texture0_location;
789 int chroma_color_location;
797 prog.texture0_combinera == texture0_combinera_key &&
798 prog.texture1_combinera == texture1_combinera_key &&
799 prog.fog_enabled == fog_enabled &&
800 + prog.alpha_test == alpha_test && //*SEB*
801 prog.chroma_enabled == chroma_enabled &&
802 prog.dither_enabled == dither_enabled &&
803 prog.blackandwhite0 == blackandwhite0 &&
804 @@ -514,11 +526,13 @@
808 - if(shader_programs != NULL)
809 - shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key));
810 + if(shader_programs != NULL) {
811 + if ((number_of_programs+1)>1024)
812 + shader_programs = (shader_program_key*)realloc(shader_programs, (number_of_programs+1)*sizeof(shader_program_key));
815 - shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key));
816 - //printf("number of shaders %d\n", number_of_programs);
817 + shader_programs = (shader_program_key*)malloc(sizeof(shader_program_key)*1024);
818 + //printf("number of shaders %d\n", number_of_programs);
820 shader_programs[number_of_programs].color_combiner = color_combiner_key;
821 shader_programs[number_of_programs].alpha_combiner = alpha_combiner_key;
823 shader_programs[number_of_programs].dither_enabled = dither_enabled;
824 shader_programs[number_of_programs].blackandwhite0 = blackandwhite0;
825 shader_programs[number_of_programs].blackandwhite1 = blackandwhite1;
826 + shader_programs[number_of_programs].alpha_test = alpha_test; //*SEB*
831 strcat(fragment_shader, fragment_shader_color_combiner);
832 strcat(fragment_shader, fragment_shader_alpha_combiner);
833 if(fog_enabled) strcat(fragment_shader, fragment_shader_fog);
834 - strcat(fragment_shader, fragment_shader_end);
836 + strcat(fragment_shader, fragment_shader_end);
838 + strcat(fragment_shader, fragment_shader_alt_end); //*SEB*
839 if(chroma_enabled) strcat(fragment_shader, fragment_shader_chroma);
841 shader_programs[number_of_programs].fragment_shader_object = glCreateShader(GL_FRAGMENT_SHADER);
842 @@ -1719,7 +1737,7 @@
843 glActiveTexture(GL_TEXTURE2);
844 glEnable(GL_TEXTURE_2D);
845 glBindTexture(GL_TEXTURE_2D, 33*1024*1024);
846 - glTexImage2D(GL_TEXTURE_2D, 0, 4, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
847 + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 32, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
848 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
849 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
850 glDisable(GL_TEXTURE_2D);
851 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp ./Glitch64/geometry.cpp
852 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/geometry.cpp 2013-09-06 22:05:30.000000000 +0200
853 +++ ./Glitch64/geometry.cpp 2013-09-12 22:13:33.000000000 +0200
855 #define VERTEX_SIZE sizeof(VERTEX) //Size of vertex struct
858 -#include "ae_bridge.h"
859 +//#include "ae_bridge.h"
860 static float polygonOffsetFactor;
861 static float polygonOffsetUnits;
864 void FindBestDepthBias()
867 - int hardwareType = Android_JNI_GetHardwareType();
868 - Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits);
869 +/* int hardwareType = Android_JNI_GetHardwareType();
870 + Android_JNI_GetPolygonOffset(hardwareType, 1, &polygonOffsetFactor, &polygonOffsetUnits);*/
871 +// glPolygonOffset(0.2f, 0.2f);
872 + polygonOffsetFactor=0.2f;
873 + polygonOffsetUnits=0.2f;
875 float f, bestz = 0.25f;
881 - glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits);
882 +// glPolygonOffset(polygonOffsetFactor, polygonOffsetUnits);
884 + glPolygonOffset(1.0f, -(float)level*polygonOffsetUnits);
886 + glPolygonOffset(0, (float)level*3.0f);
889 glPolygonOffset(1.0f, -(float)level*zscale/255.0f);
890 @@ -408,13 +415,13 @@
891 grDrawTriangle( const void *a, const void *b, const void *c )
893 LOG("grDrawTriangle()\r\n\t");
896 if(nvidia_viewport_hack && !render_to_texture)
898 glViewport(0, viewport_offset, viewport_width, viewport_height);
899 nvidia_viewport_hack = 0;
905 if(need_to_compile) compile_shader();
906 @@ -588,13 +595,13 @@
908 void **pointers = (void**)pointers2;
909 LOG("grDrawVertexArray(%d,%d)\r\n", mode, Count);
912 if(nvidia_viewport_hack && !render_to_texture)
914 glViewport(0, viewport_offset, viewport_width, viewport_height);
915 nvidia_viewport_hack = 0;
921 if(need_to_compile) compile_shader();
922 @@ -612,13 +619,13 @@
923 grDrawVertexArrayContiguous(FxU32 mode, FxU32 Count, void *pointers, FxU32 stride)
925 LOG("grDrawVertexArrayContiguous(%d,%d,%d)\r\n", mode, Count, stride);
928 if(nvidia_viewport_hack && !render_to_texture)
930 glViewport(0, viewport_offset, viewport_width, viewport_height);
931 nvidia_viewport_hack = 0;
937 LOGINFO("Incompatible stride\n");
938 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp ./Glitch64/glitchmain.cpp
939 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/glitchmain.cpp 2013-09-06 22:05:30.000000000 +0200
940 +++ ./Glitch64/glitchmain.cpp 2013-09-15 17:13:49.000000000 +0200
943 glCompressedTexImage2DARB = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)wglGetProcAddress("glCompressedTexImage2DARB");
946 + glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
947 + glPixelStorei(GL_PACK_ALIGNMENT, 1);
956 //SDL_QuitSubSystem(SDL_INIT_VIDEO);
961 static int fbs_init = 0;
963 - //printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
964 + //printf("grTextureBufferExt(%d, %d, %d, %d, %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
965 LOG("grTextureBufferExt(%d, %d, %d, %d %d, %d, %d)\r\n", tmu, startAddress, lodmin, lodmax, aspect, fmt, evenOdd);
966 if (lodmin != lodmax) display_warning("grTextureBufferExt : loading more than one LOD");
969 tmu_usage[rtmu].min = pBufferAddress;
970 if ((unsigned int) tmu_usage[rtmu].max < pBufferAddress+size)
971 tmu_usage[rtmu].max = pBufferAddress+size;
972 - // printf("tmu %d usage now %gMb - %gMb\n",
973 - // rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f);
974 + //printf("tmu %d usage now %gMb - %gMb\n",
975 + // rtmu, tmu_usage[rtmu].min/1024.0f, tmu_usage[rtmu].max/1024.0f);
978 width = pBufferWidth;
979 @@ -927,14 +931,14 @@
980 texbufs[i].fmt = fmt;
982 texbuf_i = (texbuf_i+1)&(NB_TEXBUFS-1);
983 - //printf("texbuf %x fmt %x\n", pBufferAddress, fmt);
984 + //printf("texbuf %x fmt %x\n", pBufferAddress, fmt);
986 // ZIGGY it speeds things up to not delete the buffers
987 // a better thing would be to delete them *sometimes*
988 // remove_tex(pBufferAddress+1, pBufferAddress + size);
989 add_tex(pBufferAddress);
991 - //printf("viewport %dx%d\n", width, height);
992 + //printf("viewport %dx%d\n", width, height);
993 if (height > screen_height) {
994 glViewport( 0, viewport_offset + screen_height - height, width, height);
996 @@ -1009,7 +1013,6 @@
1001 remove_tex(pBufferAddress, pBufferAddress + width*height*2/*grTexFormatSize(fmt)*/);
1003 glGenFramebuffers( 1, &(fbs[nb_fb].fbid) );
1004 @@ -1768,6 +1771,7 @@
1007 LOG("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline);
1008 +//printf("grLfbLock(%d,%d,%d,%d,%d)\r\n", type, buffer, writeMode, origin, pixelPipeline);
1009 if (type == GR_LFB_WRITE_ONLY)
1011 display_warning("grLfbLock : write only");
1012 @@ -1792,12 +1796,32 @@
1013 if(buffer != GR_BUFFER_AUXBUFFER)
1015 if (writeMode == GR_LFBWRITEMODE_888) {
1017 + buf = (unsigned char*)malloc(width*height*4);
1018 //printf("LfbLock GR_LFBWRITEMODE_888\n");
1019 info->lfbPtr = frameBuffer;
1020 info->strideInBytes = width*4;
1021 info->writeMode = GR_LFBWRITEMODE_888;
1022 info->origin = origin;
1023 //glReadPixels(0, viewport_offset, width, height, GL_BGRA, GL_UNSIGNED_BYTE, frameBuffer);
1024 + glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1027 + unsigned char *p=buf;
1028 + for (j=0; j<height; j++)
1030 + short unsigned int *f=frameBuffer+(height-j-1)*width;
1031 + for (i=0; i<width; i++)
1043 buf = (unsigned char*)malloc(width*height*4);
1045 @@ -1807,14 +1831,22 @@
1046 info->origin = origin;
1047 glReadPixels(0, viewport_offset, width, height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1050 + unsigned char *p=buf;
1051 for (j=0; j<height; j++)
1053 + short unsigned int *f=frameBuffer+(height-j-1)*width;
1054 for (i=0; i<width; i++)
1056 - frameBuffer[(height-j-1)*width+i] =
1057 +/* frameBuffer[(height-j-1)*width+i] =
1058 ((buf[j*width*4+i*4+0] >> 3) << 11) |
1059 ((buf[j*width*4+i*4+1] >> 2) << 5) |
1060 - (buf[j*width*4+i*4+2] >> 3);
1061 + (buf[j*width*4+i*4+2] >> 3);*/
1063 + ((*(p) >> 3) << 11) |
1064 + ((*(p+1) >> 2) << 5) |
1070 @@ -1826,6 +1858,7 @@
1071 info->strideInBytes = width*2;
1072 info->writeMode = GR_LFBWRITEMODE_ZA16;
1073 info->origin = origin;
1074 + //*SEB* *TODO* check alignment
1075 glReadPixels(0, viewport_offset, width, height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer);
1078 @@ -1855,6 +1888,7 @@
1079 unsigned short *frameBuffer = (unsigned short*)dst_data;
1080 unsigned short *depthBuffer = (unsigned short*)dst_data;
1081 LOG("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride);
1082 +//printf("grLfbReadRegion(%d,%d,%d,%d,%d,%d)\r\n", src_buffer, src_x, src_y, src_width, src_height, dst_stride);
1086 @@ -1876,15 +1910,22 @@
1087 buf = (unsigned char*)malloc(src_width*src_height*4);
1089 glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1091 for (j=0; j<src_height; j++)
1094 + unsigned char *p=buf+(src_height-j-1)*src_width*4;
1095 + unsigned short *f=frameBuffer+(j*dst_stride/2);
1096 for (i=0; i<src_width; i++)
1098 - frameBuffer[j*(dst_stride/2)+i] =
1099 +/* frameBuffer[j*(dst_stride/2)+i] =
1100 ((buf[(src_height-j-1)*src_width*4+i*4+0] >> 3) << 11) |
1101 ((buf[(src_height-j-1)*src_width*4+i*4+1] >> 2) << 5) |
1102 - (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3);
1103 + (buf[(src_height-j-1)*src_width*4+i*4+2] >> 3);*/
1105 + ((*(p) >> 3) << 11) |
1106 + ((*(p+1) >> 2) << 5) |
1112 @@ -1892,15 +1933,19 @@
1115 buf = (unsigned char*)malloc(src_width*src_height*2);
1117 - glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, depthBuffer);
1118 +//*SEB read in buf, not depthBuffer.
1119 + glReadPixels(src_x, (viewport_offset)+height-src_y-src_height, src_width, src_height, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, buf);
1121 for (j=0;j<src_height; j++)
1124 + unsigned short *d=depthBuffer+j*dst_stride/2;
1125 + unsigned short *p=(unsigned short*)buf+(src_height-j-1)*src_width; //orignal look fishy. why *4???
1126 for (i=0; i<src_width; i++)
1128 - depthBuffer[j*(dst_stride/2)+i] =
1129 - ((unsigned short*)buf)[(src_height-j-1)*src_width*4+i*4];
1130 +/* depthBuffer[j*(dst_stride/2)+i] =
1131 + ((unsigned short*)buf)[(src_height-j-1)*src_width*4+i*4];*/
1132 + *(d++) = *(p++); //why *4 (prob. GL_PACK was=4), plus transcoding to short, that make *8 ???
1136 @@ -1923,6 +1968,7 @@
1138 unsigned int tex_width = 1, tex_height = 1;
1139 LOG("grLfbWriteRegion(%d,%d,%d,%d,%d,%d,%d,%d)\r\n",dst_buffer, dst_x, dst_y, src_format, src_width, src_height, pixelPipeline, src_stride);
1140 +//printf("grLfbWriteRegion(%d,%d,%d,%d,%d,%d,%d,%d)\r\n",dst_buffer, dst_x, dst_y, src_format, src_width, src_height, pixelPipeline, src_stride);
1142 //glPushAttrib(GL_ALL_ATTRIB_BITS);
1144 @@ -1949,6 +1995,12 @@
1145 glActiveTexture(texture_number);
1147 const unsigned int half_stride = src_stride / 2;
1149 + const int comp_stride = half_stride - src_width;
1150 + const int comp_tex = (tex_width - src_width)*4;
1151 + unsigned short *f=frameBuffer;
1152 + unsigned char *p=buf;
1156 case GR_LFB_SRC_FMT_1555:
1157 @@ -1956,12 +2008,20 @@
1159 for (i=0; i<src_width; i++)
1161 - const unsigned int col = frameBuffer[j*half_stride+i];
1162 +/* const unsigned int col = frameBuffer[j*half_stride+i];
1163 buf[j*tex_width*4+i*4+0]=((col>>10)&0x1F)<<3;
1164 buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3;
1165 buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
1166 - buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0;
1167 + buf[j*tex_width*4+i*4+3]= (col>>15) ? 0xFF : 0;*/
1168 + const unsigned int col = *(f++);
1169 + *(p)=((col>>10)&0x1F)<<3;
1170 + *(p+1)=((col>>5)&0x1F)<<3;
1171 + *(p+2)=((col>>0)&0x1F)<<3;
1172 + *(p+3)= (col>>15) ? 0xFF : 0;
1179 case GR_LFBWRITEMODE_555:
1180 @@ -1969,12 +2029,20 @@
1182 for (i=0; i<src_width; i++)
1184 - const unsigned int col = frameBuffer[j*half_stride+i];
1185 +/* const unsigned int col = frameBuffer[j*half_stride+i];
1186 buf[j*tex_width*4+i*4+0]=((col>>10)&0x1F)<<3;
1187 buf[j*tex_width*4+i*4+1]=((col>>5)&0x1F)<<3;
1188 buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
1189 - buf[j*tex_width*4+i*4+3]=0xFF;
1190 + buf[j*tex_width*4+i*4+3]=0xFF;*/
1191 + const unsigned int col = *(f++);
1192 + *(p)=((col>>10)&0x1F)<<3;
1193 + *(p+1)=((col>>5)&0x1F)<<3;
1194 + *(p+2)=((col>>0)&0x1F)<<3;
1202 case GR_LFBWRITEMODE_565:
1203 @@ -1982,12 +2050,20 @@
1205 for (i=0; i<src_width; i++)
1207 - const unsigned int col = frameBuffer[j*half_stride+i];
1208 +/* const unsigned int col = frameBuffer[j*half_stride+i];
1209 buf[j*tex_width*4+i*4+0]=((col>>11)&0x1F)<<3;
1210 buf[j*tex_width*4+i*4+1]=((col>>5)&0x3F)<<2;
1211 buf[j*tex_width*4+i*4+2]=((col>>0)&0x1F)<<3;
1212 - buf[j*tex_width*4+i*4+3]=0xFF;
1213 + buf[j*tex_width*4+i*4+3]=0xFF;*/
1214 + const unsigned int col = *(f++);
1215 + *(p)=((col>>11)&0x1F)<<3;
1216 + *(p+1)=((col>>5)&0x3F)<<2;
1217 + *(p+2)=((col>>0)&0x1F)<<3;
1226 @@ -2006,7 +2082,7 @@
1229 glBindTexture(GL_TEXTURE_2D, default_texture);
1230 - glTexImage2D(GL_TEXTURE_2D, 0, 4, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1231 + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex_width, tex_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
1235 diff -Naur ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp ./Glitch64/textures.cpp
1236 --- ../../../../git/mupen64plus-ae/jni/gles2glide64/src/Glitch64/textures.cpp 2013-09-06 22:05:31.000000000 +0200
1237 +++ ./Glitch64/textures.cpp 2013-09-13 11:32:50.000000000 +0200
1242 +#include <string.h>
1244 /* Napalm extensions to GrTextureFormat_t */
1245 #define GR_TEXFMT_ARGB_CMP_FXT1 0x11
1248 glDeleteTextures(n, t);
1250 - //printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax);
1251 +//printf("RMVTEX nbtex is now %d (%06x - %06x)\n", nbTex, idmin, idmax);
1257 texlist *aux = list;
1259 - //printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id);
1260 +//printf("ADDTEX nbtex is now %d (%06x)\n", nbTex, id);
1261 if (list == NULL || id < list->id)
1264 @@ -435,8 +436,11 @@
1267 factor = grTexFormat2GLPackedFmt(info->format, &gltexfmt, &glpixfmt, &glpackfmt);
1269 +//printf("grTexDownloadMipmap, id=%x, size=%ix%i, format=%x\n", startAddress+1, width, height, info->format);
1271 + gltexfmt = GL_RGBA;
1272 + glpixfmt = GL_RGBA;
1273 + glpackfmt = GL_UNSIGNED_BYTE;
1275 // VP fixed the texture conversions to be more accurate, also swapped
1276 // the for i/j loops so that is is less likely to break the memory cache
1278 switch(info->format)
1280 case GR_TEXFMT_ALPHA_8:
1281 - for (i=0; i<height; i++)
1282 + /* for (i=0; i<height; i++)
1284 for (j=0; j<width; j++)
1286 @@ -457,10 +461,25 @@
1290 - glformat = GL_RGBA;
1291 + glformat = GL_RGBA;*/
1293 + for (i=0; i<height; i++)
1295 + for (j=0; j<width; j++)
1297 + unsigned short texel = (unsigned short)((unsigned char*)info->data)[m];
1298 + ((unsigned short*)texture)[n] = texel|(texel<<8);
1304 + glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
1305 + glpackfmt = GL_UNSIGNED_BYTE;
1308 case GR_TEXFMT_INTENSITY_8: // I8 support - H.Morii
1309 - for (i=0; i<height; i++)
1310 +/* for (i=0; i<height; i++)
1312 for (j=0; j<width; j++)
1314 @@ -470,9 +489,13 @@
1321 +// glformat = GL_ALPHA;
1322 + memcpy(texture, info->data, width*height);
1323 + glformat = gltexfmt = glpixfmt = GL_LUMINANCE;
1324 + glpackfmt = GL_UNSIGNED_BYTE;
1326 - glformat = GL_ALPHA;
1328 case GR_TEXFMT_ALPHA_INTENSITY_44:
1332 for (j=0; j<width; j++)
1334 - unsigned int texel = (unsigned int)((unsigned char*)info->data)[m];
1335 +/* unsigned int texel = (unsigned int)((unsigned char*)info->data)[m];
1337 - /* accurate conversion */
1338 + // accurate conversion
1339 unsigned int texel_hi = (texel & 0x000000F0) << 20;
1340 unsigned int texel_low = texel & 0x0000000F;
1341 texel_low |= (texel_low << 4);
1342 @@ -493,61 +516,90 @@
1343 texel_hi |= ((texel_low << 16) | (texel_low << 8) | texel_low);
1345 ((unsigned int*)texture)[n] = texel_hi;
1347 + unsigned char texel = ((unsigned char*)info->data)[m];
1348 + unsigned short texel_hi = (texel & 0x000000F0) << 4;
1349 + unsigned short texel_low = texel & 0x0000000F;
1350 + texel_low |= (texel_low << 4);
1351 + texel_hi |= ((texel_hi << 4) | (texel_low));
1352 + ((unsigned short*)texture)[n] = texel_hi;
1358 - glformat = GL_LUMINANCE_ALPHA;
1359 + glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
1360 + glpackfmt = GL_UNSIGNED_BYTE;
1361 +// glformat = GL_LUMINANCE_ALPHA;
1364 case GR_TEXFMT_RGB_565:
1365 - for (i=0; i<height; i++)
1366 +/* for (i=0; i<height; i++)
1368 for (j=0; j<width; j++)
1370 - unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1372 +/* unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1373 unsigned int B = texel & 0x0000F800;
1374 unsigned int G = texel & 0x000007E0;
1375 unsigned int R = texel & 0x0000001F;
1377 - /* accurate conversion */
1378 + // accurate conversion
1379 ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | ((R >> 2) << 16) | (G << 5) | ((G >> 9) << 8) | (B >> 8) | (B >> 13);
1381 ((unsigned int*)texture)[n] = 0xFF000000 | (R << 19) | (G << 5) | (B >> 8);
1384 +/* const unsigned short texel = ((unsigned short*)info->data)[m];
1385 + const unsigned short B = (texel & 0xF800)>>11;
1386 + const unsigned short G = texel & 0x07E0;
1387 + const unsigned short R = (texel & 0x001F)<<11;
1388 + ((unsigned short*)texture)[n] = R|G|B;
1394 + memcpy(texture, info->data, width*height*2);
1396 - glformat = GL_RGB;
1397 +// glformat = GL_RGB;
1398 + glformat = gltexfmt = glpixfmt = GL_RGB;
1399 + glpackfmt = GL_UNSIGNED_SHORT_5_6_5;
1401 case GR_TEXFMT_ARGB_1555:
1402 for (i=0; i<height; i++)
1404 for (j=0; j<width; j++)
1406 - unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1407 +/* unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1408 unsigned int A = texel & 0x00008000 ? 0xFF000000 : 0;
1409 unsigned int B = texel & 0x00007C00;
1410 unsigned int G = texel & 0x000003E0;
1411 unsigned int R = texel & 0x0000001F;
1413 - /* accurate conversion */
1414 + // accurate conversion
1415 ((unsigned int*)texture)[n] = A | (R << 19) | ((R >> 2) << 16) | (G << 6) | ((G >> 8) << 8) | (B >> 7) | (B >> 12);
1417 ((unsigned int*)texture)[n] = A | (R << 19) | (G << 6) | (B >> 7);
1420 + unsigned short texel = ((unsigned short*)info->data)[m];
1421 + unsigned short A = (texel & 0x8000)>>15;
1422 + ((unsigned short*)texture)[n] = A|(texel&0x7fff)<<1;
1424 + unsigned short B = (texel & 0x7C00)>>9;
1425 + unsigned short G = texel & 0x03E0<<1;
1426 + unsigned short R = (texel & 0x001F)<<11;
1427 + ((unsigned short*)texture)[n] = A|R|G|B;*/
1433 - glformat = GL_RGBA;
1434 +// glformat = GL_RGBA;
1435 + glformat = gltexfmt = glpixfmt = GL_RGBA;
1436 + glpackfmt = GL_UNSIGNED_SHORT_5_5_5_1;
1438 case GR_TEXFMT_ALPHA_INTENSITY_88:
1439 - for (i=0; i<height; i++)
1440 +/* for (i=0; i<height; i++)
1442 for (j=0; j<width; j++)
1444 @@ -557,9 +609,12 @@
1450 + memcpy(texture, info->data, width*height*2);
1452 glformat = GL_LUMINANCE_ALPHA;
1453 + glformat = gltexfmt = glpixfmt = GL_LUMINANCE_ALPHA;
1454 + glpackfmt = GL_UNSIGNED_BYTE;
1456 case GR_TEXFMT_ARGB_4444:
1458 @@ -567,23 +622,29 @@
1460 for (j=0; j<width; j++)
1462 - unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1463 +/* unsigned int texel = (unsigned int)((unsigned short*)info->data)[m];
1464 unsigned int A = texel & 0x0000F000;
1465 unsigned int B = texel & 0x00000F00;
1466 unsigned int G = texel & 0x000000F0;
1467 unsigned int R = texel & 0x0000000F;
1469 - /* accurate conversion */
1470 + // accurate conversion
1471 ((unsigned int*)texture)[n] = (A << 16) | (A << 12) | (R << 20) | (R << 16) | (G << 8) | (G << 4) | (B >> 4) | (B >> 8);
1473 ((unsigned int*)texture)[n] = (A << 16) | (R << 20) | (G << 8) | (B >> 4);
1476 + unsigned short texel = ((unsigned short*)info->data)[m];
1477 + unsigned int A = (texel & 0xF000)>>12;
1478 + ((unsigned short*)texture)[n] = A|(texel&0x0fff)<<4;
1485 + glformat = gltexfmt = glpixfmt = GL_RGBA;
1486 + glpackfmt = GL_UNSIGNED_SHORT_4_4_4_4;
1488 case GR_TEXFMT_ARGB_8888:
1489 for (i=0; i<height; i++)
1491 if (largest_supported_anisotropy > 1.0f)
1492 glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, largest_supported_anisotropy);
1494 - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
1495 +//*SEB* glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
1496 +//printf("new texture, id=%x, size=%ix%i, fmt=%x/%x\n", startAddress+1, width, height, gltexfmt, glpackfmt);
1497 + glTexImage2D(GL_TEXTURE_2D, 0, gltexfmt, width, height, 0, glpixfmt, glpackfmt, texture);
1499 switch(info->format)