GLES2N64 (from mupen64plus-ae) plugin. Compile and run on the OpenPandora
[mupen64plus-pandora.git] / source / gles2n64 / src / ShaderCombiner.cpp
1
2 #include <stdlib.h>
3 #include "OpenGL.h"
4 #include "ShaderCombiner.h"
5 #include "Common.h"
6 #include "Textures.h"
7 #include "Config.h"
8
9
10 //(sa - sb) * m + a
11 static const u32 saRGBExpanded[] =
12 {
13     COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
14     SHADE,              ENVIRONMENT,        ONE,                NOISE,
15     ZERO,               ZERO,               ZERO,               ZERO,
16     ZERO,               ZERO,               ZERO,               ZERO
17 };
18
19 static const u32 sbRGBExpanded[] =
20 {
21     COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
22     SHADE,              ENVIRONMENT,        CENTER,             K4,
23     ZERO,               ZERO,               ZERO,               ZERO,
24     ZERO,               ZERO,               ZERO,               ZERO
25 };
26
27 static const u32 mRGBExpanded[] =
28 {
29     COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
30     SHADE,              ENVIRONMENT,        SCALE,              COMBINED_ALPHA,
31     TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,    SHADE_ALPHA,
32     ENV_ALPHA,          LOD_FRACTION,       PRIM_LOD_FRAC,      K5,
33     ZERO,               ZERO,               ZERO,               ZERO,
34     ZERO,               ZERO,               ZERO,               ZERO,
35     ZERO,               ZERO,               ZERO,               ZERO,
36     ZERO,               ZERO,               ZERO,               ZERO
37 };
38
39 static const u32 aRGBExpanded[] =
40 {
41     COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
42     SHADE,              ENVIRONMENT,        ONE,                ZERO
43 };
44
45 static const u32 saAExpanded[] =
46 {
47     COMBINED,           TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
48     SHADE_ALPHA,        ENV_ALPHA,          ONE,                ZERO
49 };
50
51 static const u32 sbAExpanded[] =
52 {
53     COMBINED,           TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
54     SHADE_ALPHA,        ENV_ALPHA,          ONE,                ZERO
55 };
56
57 static const u32 mAExpanded[] =
58 {
59     LOD_FRACTION,       TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
60     SHADE_ALPHA,        ENV_ALPHA,          PRIM_LOD_FRAC,      ZERO,
61 };
62
63 static const u32 aAExpanded[] =
64 {
65     COMBINED,           TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
66     SHADE_ALPHA,        ENV_ALPHA,          ONE,                ZERO
67 };
68
69 int CCEncodeA[] = {0, 1, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 15, 15, 6, 15 };
70 int CCEncodeB[] = {0, 1, 2, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 15, 15, 15 };
71 int CCEncodeC[] = {0, 1, 2, 3, 4, 5, 31, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31, 31, 15, 31, 31};
72 int CCEncodeD[] = {0, 1, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 6, 15};
73 int ACEncodeA[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7};
74 int ACEncodeB[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7};
75 int ACEncodeC[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 6, 7, 7, 7, 7, 7};
76 int ACEncodeD[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7};
77
78 ShaderProgram *scProgramRoot = NULL;
79 ShaderProgram *scProgramCurrent = NULL;
80 int scProgramChanged = 0;
81 int scProgramCount = 0;
82
83 GLint _vertex_shader = 0;
84
85 const char *_frag_header = "                                \n"\
86 "uniform sampler2D uTex0;                                   \n"\
87 "uniform sampler2D uTex1;                                   \n"\
88 "uniform sampler2D uNoise;                                  \n"\
89 "uniform lowp vec4 uEnvColor;                               \n"\
90 "uniform lowp vec4 uPrimColor;                              \n"\
91 "uniform lowp vec4 uFogColor;                               \n"\
92 "uniform highp float uAlphaRef;                             \n"\
93 "uniform lowp float uPrimLODFrac;                           \n"\
94 "uniform lowp float uK4;                                    \n"\
95 "uniform lowp float uK5;                                    \n"\
96 "                                                           \n"\
97 "varying lowp float vFactor;                                \n"\
98 "varying lowp vec4 vShadeColor;                             \n"\
99 "varying mediump vec2 vTexCoord0;                           \n"\
100 "varying mediump vec2 vTexCoord1;                           \n"\
101 "                                                           \n"\
102 "void main()                                                \n"\
103 "{                                                          \n"\
104 "lowp vec4 lFragColor;                                      \n";
105
106
107 const char *_vert = "                                       \n"\
108 "attribute highp vec4   aPosition;                          \n"\
109 "attribute lowp vec4    aColor;                             \n"\
110 "attribute highp vec2   aTexCoord0;                         \n"\
111 "attribute highp vec2   aTexCoord1;                         \n"\
112 "                                                           \n"\
113 "uniform bool               uEnableFog;                         \n"\
114 "uniform float                  uFogMultiplier, uFogOffset;         \n"\
115 "uniform float                  uRenderState;                       \n"\
116 "                                                           \n"\
117 "uniform mediump vec2   uTexScale;                          \n"\
118 "uniform mediump vec2   uTexOffset[2];                      \n"\
119 "uniform mediump vec2   uCacheShiftScale[2];                \n"\
120 "uniform mediump vec2   uCacheScale[2];                     \n"\
121 "uniform mediump vec2   uCacheOffset[2];                    \n"\
122 "                                                           \n"\
123 "varying lowp float     vFactor;                            \n"\
124 "varying lowp vec4              vShadeColor;                        \n"\
125 "varying mediump vec2   vTexCoord0;                         \n"\
126 "varying mediump vec2   vTexCoord1;                         \n"\
127 "                                                           \n"\
128 "void main()                                                \n"\
129 "{                                                          \n"\
130 "gl_Position = aPosition;                                   \n"\
131 "vShadeColor = aColor;                                      \n"\
132 "                                                           \n"\
133 "if (uRenderState == 1.0)                                   \n"\
134 "{                                                          \n"\
135 "vTexCoord0 = (aTexCoord0 * (uTexScale[0] *                 \n"\
136 "           uCacheShiftScale[0]) + (uCacheOffset[0] -       \n"\
137 "           uTexOffset[0])) * uCacheScale[0];               \n"\
138 "vTexCoord1 = (aTexCoord0 * (uTexScale[1] *                 \n"\
139 "           uCacheShiftScale[1]) + (uCacheOffset[1] -       \n"\
140 "           uTexOffset[1])) * uCacheScale[1];               \n"\
141 "}                                                          \n"\
142 "else                                                       \n"\
143 "{                                                          \n"\
144 "vTexCoord0 = aTexCoord0;                                   \n"\
145 "vTexCoord1 = aTexCoord1;                                   \n"\
146 "}                                                          \n"\
147 "                                                           \n";
148
149 const char * _vertfog = "                                   \n"\
150 "if (uEnableFog)                                            \n"\
151 "{                                                          \n"\
152 "vFactor = max(-1.0, aPosition.z / aPosition.w)             \n"\
153 "   * uFogMultiplier + uFogOffset;                          \n"\
154 "vFactor = clamp(vFactor, 0.0, 1.0);                        \n"\
155 "}                                                          \n";
156
157 const char * _vertzhack = "                                 \n"\
158 "if (uRenderState == 1.0)                                   \n"\
159 "{                                                          \n"\
160 "gl_Position.z = (gl_Position.z + gl_Position.w*9.0) * 0.1; \n"\
161 "}                                                          \n";
162
163
164 const char * _color_param_str(int param)
165 {
166     switch(param)
167     {
168         case COMBINED:          return "lFragColor.rgb";
169         case TEXEL0:            return "lTex0.rgb";
170         case TEXEL1:            return "lTex1.rgb";
171         case PRIMITIVE:         return "uPrimColor.rgb";
172         case SHADE:             return "vShadeColor.rgb";
173         case ENVIRONMENT:       return "uEnvColor.rgb";
174         case CENTER:            return "vec3(0.0)";
175         case SCALE:             return "vec3(0.0)";
176         case COMBINED_ALPHA:    return "vec3(lFragColor.a)";
177         case TEXEL0_ALPHA:      return "vec3(lTex0.a)";
178         case TEXEL1_ALPHA:      return "vec3(lTex1.a)";
179         case PRIMITIVE_ALPHA:   return "vec3(uPrimColor.a)";
180         case SHADE_ALPHA:       return "vec3(vShadeColor.a)";
181         case ENV_ALPHA:         return "vec3(uEnvColor.a)";
182         case LOD_FRACTION:      return "vec3(0.0)";
183         case PRIM_LOD_FRAC:     return "vec3(uPrimLODFrac)";
184         case NOISE:             return "lNoise.rgb";
185         case K4:                return "vec3(uK4)";
186         case K5:                return "vec3(uK5)";
187         case ONE:               return "vec3(1.0)";
188         case ZERO:              return "vec3(0.0)";
189         default:
190             return "vec3(0.0)";
191     }
192 }
193
194 const char * _alpha_param_str(int param)
195 {
196     switch(param)
197     {
198         case COMBINED:          return "lFragColor.a";
199         case TEXEL0:            return "lTex0.a";
200         case TEXEL1:            return "lTex1.a";
201         case PRIMITIVE:         return "uPrimColor.a";
202         case SHADE:             return "vShadeColor.a";
203         case ENVIRONMENT:       return "uEnvColor.a";
204         case CENTER:            return "0.0";
205         case SCALE:             return "0.0";
206         case COMBINED_ALPHA:    return "lFragColor.a";
207         case TEXEL0_ALPHA:      return "lTex0.a";
208         case TEXEL1_ALPHA:      return "lTex1.a";
209         case PRIMITIVE_ALPHA:   return "uPrimColor.a";
210         case SHADE_ALPHA:       return "vShadeColor.a";
211         case ENV_ALPHA:         return "uEnvColor.a";
212         case LOD_FRACTION:      return "0.0";
213         case PRIM_LOD_FRAC:     return "uPrimLODFrac";
214         case NOISE:             return "lNoise.a";
215         case K4:                return "uK4";
216         case K5:                return "uK5";
217         case ONE:               return "1.0";
218         case ZERO:              return "0.0";
219         default:
220             return "0.0";
221     }
222 }
223
224 #define MAX_CACHE       16
225 ShaderProgram*  prog_cache[MAX_CACHE];
226 u64                             mux_cache[MAX_CACHE];
227 int                             flag_cache[MAX_CACHE];
228 int                             old_cache[MAX_CACHE];
229 static int              cache_turn=0;
230
231 DecodedMux::DecodedMux(u64 mux, bool cycle2)
232 {
233     combine.mux = mux;
234     flags = 0;
235
236     //set to ZERO.
237     for(int i=0;i<4;i++)
238         for(int j=0; j< 4; j++)
239             decode[i][j] = ZERO;
240
241     //rgb cycle 0
242     decode[0][0] = saRGBExpanded[combine.saRGB0];
243     decode[0][1] = sbRGBExpanded[combine.sbRGB0];
244     decode[0][2] = mRGBExpanded[combine.mRGB0];
245     decode[0][3] = aRGBExpanded[combine.aRGB0];
246     decode[1][0] = saAExpanded[combine.saA0];
247     decode[1][1] = sbAExpanded[combine.sbA0];
248     decode[1][2] = mAExpanded[combine.mA0];
249     decode[1][3] = aAExpanded[combine.aA0];
250     if (cycle2)
251     {
252         //rgb cycle 1
253         decode[2][0] = saRGBExpanded[combine.saRGB1];
254         decode[2][1] = sbRGBExpanded[combine.sbRGB1];
255         decode[2][2] = mRGBExpanded[combine.mRGB1];
256         decode[2][3] = aRGBExpanded[combine.aRGB1];
257         decode[3][0] = saAExpanded[combine.saA1];
258         decode[3][1] = sbAExpanded[combine.sbA1];
259         decode[3][2] = mAExpanded[combine.mA1];
260         decode[3][3] = aAExpanded[combine.aA1];
261
262         //texel 0/1 are swapped in 2nd cycle.
263         swap(1, TEXEL0, TEXEL1);
264         swap(1, TEXEL0_ALPHA, TEXEL1_ALPHA);
265     }
266
267     //simplifying mux:
268     if (replace(G_CYC_1CYCLE, LOD_FRACTION, ZERO) || replace(G_CYC_2CYCLE, LOD_FRACTION, ZERO))
269         LOG(LOG_VERBOSE, "SC Replacing LOD_FRACTION with ZERO\n");
270 #if 1
271     if (replace(G_CYC_1CYCLE, K4, ZERO) || replace(G_CYC_2CYCLE, K4, ZERO))
272         LOG(LOG_VERBOSE, "SC Replacing K4 with ZERO\n");
273
274     if (replace(G_CYC_1CYCLE, K5, ZERO) || replace(G_CYC_2CYCLE, K5, ZERO))
275         LOG(LOG_VERBOSE, "SC Replacing K5 with ZERO\n");
276 #endif
277
278     if (replace(G_CYC_1CYCLE, CENTER, ZERO) || replace(G_CYC_2CYCLE, CENTER, ZERO))
279         LOG(LOG_VERBOSE, "SC Replacing CENTER with ZERO\n");
280
281     if (replace(G_CYC_1CYCLE, SCALE, ZERO) || replace(G_CYC_2CYCLE, SCALE, ZERO))
282         LOG(LOG_VERBOSE, "SC Replacing SCALE with ZERO\n");
283
284     //Combiner has initial value of zero in cycle 0
285     if (replace(G_CYC_1CYCLE, COMBINED, ZERO))
286         LOG(LOG_VERBOSE, "SC Setting CYCLE1 COMBINED to ZERO\n");
287
288     if (replace(G_CYC_1CYCLE, COMBINED_ALPHA, ZERO))
289         LOG(LOG_VERBOSE, "SC Setting CYCLE1 COMBINED_ALPHA to ZERO\n");
290
291     if (!config.enableNoise)
292     {
293         if (replace(G_CYC_1CYCLE, NOISE, ZERO))
294             LOG(LOG_VERBOSE, "SC Setting CYCLE1 NOISE to ZERO\n");
295
296         if (replace(G_CYC_2CYCLE, NOISE, ZERO))
297             LOG(LOG_VERBOSE, "SC Setting CYCLE2 NOISE to ZERO\n");
298
299     }
300
301     //mutiplying by zero: (A-B)*0 + C = C
302     for(int i=0 ; i<4; i++)
303     {
304         if (decode[i][2] == ZERO)
305         {
306             decode[i][0] = ZERO;
307             decode[i][1] = ZERO;
308         }
309     }
310
311     //(A1-B1)*C1 + D1
312     //(A2-B2)*C2 + D2
313     //1. ((A1-B1)*C1 + D1 - B2)*C2 + D2 = A1*C1*C2 - B1*C1*C2 + D1*C2 - B2*C2 + D2
314     //2. (A2 - (A1-B1)*C1 - D1)*C2 + D2 = A2*C2 - A1*C1*C2 + B1*C1*C2 - D1*C2 + D2
315     //3. (A2 - B2)*((A1-B1)*C1 + D1) + D2 = A2*A1*C1 - A2*B1*C1 + A2*D1 - B2*A1*C1 + B2*B1*C1 - B2*D1 + D2
316     //4. (A2-B2)*C2 + (A1-B1)*C1 + D1 = A2*C2 - B2*C2 + A1*C1 - B1*C1 + D1
317
318     if (cycle2)
319     {
320
321         if (!find(2, COMBINED))
322             flags |= SC_IGNORE_RGB0;
323
324         if (!(find(2, COMBINED_ALPHA) || find(3, COMBINED_ALPHA) || find(3, COMBINED)))
325             flags |= SC_IGNORE_ALPHA0;
326
327         if (decode[2][0] == ZERO && decode[2][1] == ZERO && decode[2][2] == ZERO && decode[2][3] == COMBINED)
328         {
329             flags |= SC_IGNORE_RGB1;
330         }
331
332         if (decode[3][0] == ZERO && decode[3][1] == ZERO && decode[3][2] == ZERO &&
333             (decode[3][3] == COMBINED_ALPHA || decode[3][3] == COMBINED))
334         {
335             flags |= SC_IGNORE_ALPHA1;
336         }
337
338     }
339 }
340
341 bool DecodedMux::find(int index, int src)
342 {
343     for(int j=0;j<4;j++)
344     {
345         if (decode[index][j] == src) return true;
346     }
347     return false;
348 }
349
350 bool DecodedMux::replace(int cycle, int src, int dest)
351 {
352     int r = false;
353     for(int i=0;i<2;i++)
354     {
355         int ii = (cycle == 0) ? i : (2+i);
356         for(int j=0;j<4;j++)
357         {
358             if (decode[ii][j] == src) {decode[ii][j] = dest; r=true;}
359         }
360     }
361     return r;
362 }
363
364 bool DecodedMux::swap(int cycle, int src0, int src1)
365 {
366     int r = false;
367     for(int i=0;i<2;i++)
368     {
369         int ii = (cycle == 0) ? i : (2+i);
370         for(int j=0;j<4;j++)
371         {
372             if (decode[ii][j] == src0) {decode[ii][j] = src1; r=true;}
373             else if (decode[ii][j] == src1) {decode[ii][j] = src0; r=true;}
374         }
375     }
376     return r;
377 }
378
379 void DecodedMux::hack()
380 {
381     if (config.hackZelda)
382     {
383         if(combine.mux == 0xfffd923800ffadffLL)
384         {
385             replace(G_CYC_1CYCLE, TEXEL1, TEXEL0);
386             replace(G_CYC_2CYCLE, TEXEL1, TEXEL0);
387         }
388         else if (combine.mux == 0xff5bfff800121603LL)
389         {
390             replace(G_CYC_1CYCLE, TEXEL1, ZERO);
391             replace(G_CYC_2CYCLE, TEXEL1, ZERO);
392         }
393     }
394
395 }
396
397
398 int _program_compare(ShaderProgram *prog, DecodedMux *dmux, u32 flags)
399 {
400     if (prog)
401         return ((prog->combine.mux == dmux->combine.mux) && (prog->flags == flags));
402     else
403         return 1;
404 }
405
406 void _glcompiler_error(GLint shader)
407 {
408     int len, i;
409     char* log;
410
411     glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &len);
412     log = (char*) malloc(len + 1);
413     glGetShaderInfoLog(shader, len, &i, log);
414     log[len] = 0;
415     LOG(LOG_ERROR, "COMPILE ERROR: %s \n", log);
416     free(log);
417 }
418
419 void _gllinker_error(GLint program)
420 {
421     int len, i;
422     char* log;
423
424     glGetProgramiv(program, GL_INFO_LOG_LENGTH, &len);
425     log = (char*) malloc(len + 1);
426     glGetProgramInfoLog(program, len, &i, log);
427     log[len] = 0;
428     LOG(LOG_ERROR, "LINK ERROR: %s \n", log);
429     free(log);
430 };
431
432 void _locate_attributes(ShaderProgram *p)
433 {
434     glBindAttribLocation(p->program, SC_POSITION,   "aPosition");
435     glBindAttribLocation(p->program, SC_COLOR,      "aColor");
436     glBindAttribLocation(p->program, SC_TEXCOORD0,  "aTexCoord0");
437     glBindAttribLocation(p->program, SC_TEXCOORD1,  "aTexCoord1");
438 };
439
440 #define LocateUniform(A) \
441     p->uniforms.A.loc = glGetUniformLocation(p->program, #A);
442
443 void _locate_uniforms(ShaderProgram *p)
444 {
445     LocateUniform(uTex0);
446     LocateUniform(uTex1);
447     LocateUniform(uNoise);
448     LocateUniform(uEnvColor);
449     LocateUniform(uPrimColor);
450     LocateUniform(uPrimLODFrac);
451     LocateUniform(uK4);
452     LocateUniform(uK5);
453     LocateUniform(uFogColor);
454     LocateUniform(uEnableFog);
455     LocateUniform(uRenderState);
456     LocateUniform(uFogMultiplier);
457     LocateUniform(uFogOffset);
458     LocateUniform(uAlphaRef);
459     LocateUniform(uTexScale);
460     LocateUniform(uTexOffset[0]);
461     LocateUniform(uTexOffset[1]);
462     LocateUniform(uCacheShiftScale[0]);
463     LocateUniform(uCacheShiftScale[1]);
464     LocateUniform(uCacheScale[0]);
465     LocateUniform(uCacheScale[1]);
466     LocateUniform(uCacheOffset[0]);
467     LocateUniform(uCacheOffset[1]);
468 }
469
470 void _force_uniforms()
471 {
472     SC_ForceUniform1i(uTex0, 0);
473     SC_ForceUniform1i(uTex1, 1);
474     SC_ForceUniform1i(uNoise, 2);
475     SC_ForceUniform4fv(uEnvColor, &gDP.envColor.r);
476     SC_ForceUniform4fv(uPrimColor, &gDP.primColor.r);
477     SC_ForceUniform1f(uPrimLODFrac, gDP.primColor.l);
478     SC_ForceUniform1f(uK4, gDP.convert.k4);
479     SC_ForceUniform1f(uK5, gDP.convert.k5);
480     SC_ForceUniform4fv(uFogColor, &gDP.fogColor.r);
481     SC_ForceUniform1i(uEnableFog, ((config.enableFog==1) && (gSP.geometryMode & G_FOG)));
482     SC_ForceUniform1f(uRenderState, OGL.renderState);
483     SC_ForceUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f);
484     SC_ForceUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f);
485     SC_ForceUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5 : gDP.blendColor.a);
486     SC_ForceUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet);
487
488     if (gSP.textureTile[0]){
489         SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult);
490     } else {
491         SC_ForceUniform2f(uTexOffset[0], 0.0f, 0.0f);
492     }
493
494     if (gSP.textureTile[1])
495     {
496         SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult);
497     }
498     else
499     {
500         SC_ForceUniform2f(uTexOffset[1], 0.0f, 0.0f);
501     }
502
503     if (cache.current[0])
504     {
505         SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT);
506         SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT);
507         SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT);
508     }
509     else
510     {
511         SC_ForceUniform2f(uCacheShiftScale[0], 1.0f, 1.0f);
512         SC_ForceUniform2f(uCacheScale[0], 1.0f, 1.0f);
513         SC_ForceUniform2f(uCacheOffset[0], 0.0f, 0.0f);
514     }
515
516     if (cache.current[1])
517     {
518         SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT);
519         SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT);
520         SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT);
521     }
522     else
523     {
524         SC_ForceUniform2f(uCacheShiftScale[1], 1.0f, 1.0f);
525         SC_ForceUniform2f(uCacheScale[1], 1.0f, 1.0f);
526         SC_ForceUniform2f(uCacheOffset[1], 0.0f, 0.0f);
527     }
528 }
529
530 void _update_uniforms()
531 {
532     SC_SetUniform4fv(uEnvColor, &gDP.envColor.r);
533     SC_SetUniform4fv(uPrimColor, &gDP.primColor.r);
534     SC_SetUniform1f(uPrimLODFrac, gDP.primColor.l);
535     SC_SetUniform4fv(uFogColor, &gDP.fogColor.r);
536     SC_SetUniform1i(uEnableFog, (config.enableFog && (gSP.geometryMode & G_FOG)));
537     SC_SetUniform1f(uRenderState, OGL.renderState);
538     SC_SetUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f);
539     SC_SetUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f);
540     SC_SetUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5 : gDP.blendColor.a);
541     SC_SetUniform1f(uK4, gDP.convert.k4);
542     SC_SetUniform1f(uK5, gDP.convert.k5);
543
544     //for some reason i must force these...
545     SC_ForceUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet);
546     if (scProgramCurrent->usesT0)
547     {
548         if (gSP.textureTile[0])
549         {
550             SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult);
551         }
552         if (cache.current[0])
553         {
554             SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT);
555             SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT);
556             SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT);
557         }
558     }
559
560     if (scProgramCurrent->usesT1)
561     {
562         if (gSP.textureTile[1])
563         {
564             SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult);
565         }
566         if (cache.current[1])
567         {
568             SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT);
569             SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT);
570             SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT);
571         }
572     }
573 };
574
575 void ShaderCombiner_Init()
576 {
577     //compile vertex shader:
578     GLint success;
579     const char *src[1];
580     char buff[4096];
581     char *str = buff;
582
583     str += sprintf(str, "%s", _vert);
584     if (config.enableFog)
585     {
586         str += sprintf(str, "%s", _vertfog);
587     }
588     if (config.zHack)
589     {
590         str += sprintf(str, "%s", _vertzhack);
591     }
592
593     str += sprintf(str, "}\n\n");
594
595 #ifdef PRINT_SHADER
596     LOG(LOG_VERBOSE, "=============================================================\n");
597     LOG(LOG_VERBOSE, "Vertex Shader:\n");
598     LOG(LOG_VERBOSE, "=============================================================\n");
599     LOG(LOG_VERBOSE, "%s", buff);
600     LOG(LOG_VERBOSE, "=============================================================\n");
601 #endif
602
603     src[0] = buff;
604     _vertex_shader = glCreateShader(GL_VERTEX_SHADER);
605     glShaderSource(_vertex_shader, 1, (const char**) src, NULL);
606     glCompileShader(_vertex_shader);
607     glGetShaderiv(_vertex_shader, GL_COMPILE_STATUS, &success);
608     if (!success)
609     {
610         _glcompiler_error(_vertex_shader);
611     }
612         
613         // prepare prog cache
614         for (int i=0; i<MAX_CACHE; i++) {
615                 prog_cache[i]=NULL;
616                 flag_cache[i]=0;
617                 mux_cache[i]=0;
618                 old_cache[i]=0;
619         }
620         cache_turn=0;
621 };
622
623 void ShaderCombiner_DeletePrograms(ShaderProgram *prog)
624 {
625     if (prog)
626     {
627         ShaderCombiner_DeletePrograms(prog->left);
628         ShaderCombiner_DeletePrograms(prog->right);
629         glDeleteProgram(prog->program);
630         //glDeleteShader(prog->fragment);
631         free(prog);
632         scProgramCount--;
633     }
634 }
635
636 void ShaderCombiner_Destroy()
637 {
638     ShaderCombiner_DeletePrograms(scProgramRoot);
639     glDeleteShader(_vertex_shader);
640     scProgramCount = scProgramChanged = 0;
641     scProgramRoot = scProgramCurrent = NULL;
642 }
643
644 void ShaderCombiner_Set(u64 mux, int flags)
645 {
646     //banjo tooie hack
647     if ((gDP.otherMode.cycleType == G_CYC_1CYCLE) && (mux == 0x00ffe7ffffcf9fcfLL))
648     {
649         mux = EncodeCombineMode( 0, 0, 0, 0, TEXEL0, 0, PRIMITIVE, 0,
650                                  0, 0, 0, 0, TEXEL0, 0, PRIMITIVE, 0 );
651     }
652
653     //determine flags
654     if (flags == -1)
655     {
656         flags = 0;
657         if ((config.enableFog) && (gSP.geometryMode & G_FOG))
658             flags |= SC_FOGENABLED;
659
660         if (config.enableAlphaTest)
661         {
662             if ((gDP.otherMode.alphaCompare == G_AC_THRESHOLD) && !(gDP.otherMode.alphaCvgSel)){
663                 flags |= SC_ALPHAENABLED;
664                 if (gDP.blendColor.a > 0.0f) flags |= SC_ALPHAGREATER;
665             } else if (gDP.otherMode.cvgXAlpha){
666                 flags |= SC_ALPHAENABLED;
667                 flags |= SC_ALPHAGREATER;
668             }
669         }
670
671         if (gDP.otherMode.cycleType == G_CYC_2CYCLE)
672             flags |= SC_2CYCLE;
673     }
674
675
676     DecodedMux dmux(mux, flags&SC_2CYCLE);
677     dmux.hack();
678
679     //if already bound:
680     if (scProgramCurrent)
681     {
682         if (_program_compare(scProgramCurrent, &dmux, flags))
683         {
684             scProgramChanged = 0;
685             return;
686         }
687     }
688
689     //traverse binary tree for cached programs
690     scProgramChanged = 1;
691     ShaderProgram *root = scProgramRoot;
692     ShaderProgram *prog = root;
693     while(!_program_compare(prog, &dmux, flags))
694     {
695         root = prog;
696         if (prog->combine.mux < dmux.combine.mux)
697             prog = prog->right;
698         else
699             prog = prog->left;
700     }
701
702     //build new program
703     if (!prog)
704     {
705         scProgramCount++;
706         prog = ShaderCombiner_Compile(&dmux, flags);
707         if (!root)
708             scProgramRoot = prog;
709         else if (root->combine.mux < dmux.combine.mux)
710             root->right = prog;
711         else
712             root->left = prog;
713
714     }
715
716     prog->lastUsed = OGL.frame_dl;
717     scProgramCurrent = prog;
718     glUseProgram(prog->program);
719     _force_uniforms();
720 }
721
722 ShaderProgram *ShaderCombiner_Compile(DecodedMux *dmux, int flags)
723 {
724     GLint success;
725     char frag[4096];
726     char *buffer = frag;
727     ShaderProgram *prog = (ShaderProgram*) malloc(sizeof(ShaderProgram));
728
729     prog->left = prog->right = NULL;
730     prog->usesT0 = prog->usesT1 = prog->usesCol = prog->usesNoise = 0;
731     prog->combine = dmux->combine;
732     prog->flags = flags;
733     prog->vertex = _vertex_shader;
734
735     for(int i=0; i < ((flags & SC_2CYCLE) ? 4 : 2); i++)
736     {
737         //make sure were not ignoring cycle:
738         if ((dmux->flags&(1<<i)) == 0)
739         {
740             for(int j=0;j<4;j++)
741             {
742                 prog->usesT0 |= (dmux->decode[i][j] == TEXEL0 || dmux->decode[i][j] == TEXEL0_ALPHA);
743                 prog->usesT1 |= (dmux->decode[i][j] == TEXEL1 || dmux->decode[i][j] == TEXEL1_ALPHA);
744                 prog->usesCol |= (dmux->decode[i][j] == SHADE || dmux->decode[i][j] == SHADE_ALPHA);
745                 prog->usesNoise |= (dmux->decode[i][j] == NOISE);
746             }
747         }
748     }
749
750     buffer += sprintf(buffer, "%s", _frag_header);
751     if (prog->usesT0)
752         buffer += sprintf(buffer, "lowp vec4 lTex0 = texture2D(uTex0, vTexCoord0); \n");
753     if (prog->usesT1)
754         buffer += sprintf(buffer, "lowp vec4 lTex1 = texture2D(uTex1, vTexCoord1); \n");
755     if (prog->usesNoise)
756         buffer += sprintf(buffer, "lowp vec4 lNoise = texture2D(uNoise, (1.0 / 1024.0) * gl_FragCoord.st); \n");
757
758     for(int i = 0; i < ((flags & SC_2CYCLE) ? 2 : 1); i++)
759     {
760         if ((dmux->flags&(1<<(i*2))) == 0)
761         {
762             buffer += sprintf(buffer, "lFragColor.rgb = (%s - %s) * %s + %s; \n",
763                 _color_param_str(dmux->decode[i*2][0]),
764                 _color_param_str(dmux->decode[i*2][1]),
765                 _color_param_str(dmux->decode[i*2][2]),
766                 _color_param_str(dmux->decode[i*2][3])
767                 );
768         }
769
770         if ((dmux->flags&(1<<(i*2+1))) == 0)
771         {
772             buffer += sprintf(buffer, "lFragColor.a = (%s - %s) * %s + %s; \n",
773                 _alpha_param_str(dmux->decode[i*2+1][0]),
774                 _alpha_param_str(dmux->decode[i*2+1][1]),
775                 _alpha_param_str(dmux->decode[i*2+1][2]),
776                 _alpha_param_str(dmux->decode[i*2+1][3])
777                 );
778         }
779         buffer += sprintf(buffer, "gl_FragColor = lFragColor; \n");
780     };
781
782     //fog
783     if (flags&SC_FOGENABLED)
784     {
785         buffer += sprintf(buffer, "gl_FragColor = mix(gl_FragColor, uFogColor, vFactor); \n");
786     }
787
788     //alpha function
789     if (flags&SC_ALPHAENABLED)
790     {
791         if (flags&SC_ALPHAGREATER)
792             buffer += sprintf(buffer, "if (gl_FragColor.a < uAlphaRef) %s;\n", config.hackAlpha ? "gl_FragColor.a = 0" : "discard");
793         else
794             buffer += sprintf(buffer, "if (gl_FragColor.a <= uAlphaRef) %s;\n", config.hackAlpha ? "gl_FragColor.a = 0" : "discard");
795     }
796     buffer += sprintf(buffer, "} \n\n");
797     *buffer = 0;
798
799 #ifdef PRINT_SHADER
800     LOG(LOG_VERBOSE, "=============================================================\n");
801     LOG(LOG_VERBOSE, "Combine=0x%llx flags=0x%x dmux flags=0x%x\n", prog->combine.mux, flags, dmux->flags);
802     LOG(LOG_VERBOSE, "Num=%i \t usesT0=%i usesT1=%i usesCol=%i usesNoise=%i\n", scProgramCount, prog->usesT0, prog->usesT1, prog->usesCol, prog->usesNoise);
803     LOG(LOG_VERBOSE, "=============================================================\n");
804     LOG(LOG_VERBOSE, "%s", frag);
805     LOG(LOG_VERBOSE, "=============================================================\n");
806 #endif
807
808     prog->program = glCreateProgram();
809
810     //Compile:
811     char *src[1];
812     src[0] = frag;
813     GLint len[1];
814     len[0] = min(4096, strlen(frag));
815     prog->fragment = glCreateShader(GL_FRAGMENT_SHADER);
816
817     glShaderSource(prog->fragment, 1, (const char**) src, len);
818     glCompileShader(prog->fragment);
819
820
821     glGetShaderiv(prog->fragment, GL_COMPILE_STATUS, &success);
822     if (!success)
823     {
824         _glcompiler_error(prog->fragment);
825     }
826
827     //link
828     _locate_attributes(prog);
829     glAttachShader(prog->program, prog->fragment);
830     glAttachShader(prog->program, prog->vertex);
831     glLinkProgram(prog->program);
832     glGetProgramiv(prog->program, GL_LINK_STATUS, &success);
833     if (!success)
834     {
835         _gllinker_error(prog->program);
836     }
837
838     //remove fragment shader:
839     glDeleteShader(prog->fragment);
840
841     _locate_uniforms(prog);
842     return prog;
843 }
844