GLES2N64 (from mupen64plus-ae) plugin. Compile and run on the OpenPandora
authorptitSeb <sebastien.chev@gmail.com>
Tue, 24 Sep 2013 20:01:24 +0000 (22:01 +0200)
committerptitSeb <sebastien.chev@gmail.com>
Tue, 24 Sep 2013 20:01:24 +0000 (22:01 +0200)
74 files changed:
source/gles2n64/Android.mk [new file with mode: 0644]
source/gles2n64/Makefile [new file with mode: 0755]
source/gles2n64/src/2xSAI.cpp [new file with mode: 0644]
source/gles2n64/src/2xSAI.h [new file with mode: 0644]
source/gles2n64/src/3DMath.cpp [new file with mode: 0644]
source/gles2n64/src/3DMath.h [new file with mode: 0644]
source/gles2n64/src/3DMathNeon.cpp [new file with mode: 0644]
source/gles2n64/src/COPYING [new file with mode: 0644]
source/gles2n64/src/CRC.cpp [new file with mode: 0644]
source/gles2n64/src/CRC.h [new file with mode: 0644]
source/gles2n64/src/Common.h [new file with mode: 0755]
source/gles2n64/src/Config.cpp [new file with mode: 0644]
source/gles2n64/src/Config.h [new file with mode: 0644]
source/gles2n64/src/Debug.h [new file with mode: 0644]
source/gles2n64/src/DepthBuffer.cpp [new file with mode: 0644]
source/gles2n64/src/DepthBuffer.h [new file with mode: 0644]
source/gles2n64/src/F3D.cpp [new file with mode: 0644]
source/gles2n64/src/F3D.h [new file with mode: 0644]
source/gles2n64/src/F3DCBFD.cpp [new file with mode: 0644]
source/gles2n64/src/F3DCBFD.h [new file with mode: 0644]
source/gles2n64/src/F3DDKR.cpp [new file with mode: 0644]
source/gles2n64/src/F3DDKR.h [new file with mode: 0644]
source/gles2n64/src/F3DEX.cpp [new file with mode: 0644]
source/gles2n64/src/F3DEX.h [new file with mode: 0644]
source/gles2n64/src/F3DEX2.cpp [new file with mode: 0644]
source/gles2n64/src/F3DEX2.h [new file with mode: 0644]
source/gles2n64/src/F3DPD.cpp [new file with mode: 0644]
source/gles2n64/src/F3DPD.h [new file with mode: 0644]
source/gles2n64/src/F3DWRUS.cpp [new file with mode: 0644]
source/gles2n64/src/F3DWRUS.h [new file with mode: 0644]
source/gles2n64/src/FrameSkipper.cpp [new file with mode: 0644]
source/gles2n64/src/FrameSkipper.h [new file with mode: 0644]
source/gles2n64/src/GBI.cpp [new file with mode: 0644]
source/gles2n64/src/GBI.h [new file with mode: 0644]
source/gles2n64/src/Hash.h [new file with mode: 0644]
source/gles2n64/src/L3D.cpp [new file with mode: 0644]
source/gles2n64/src/L3D.h [new file with mode: 0644]
source/gles2n64/src/L3DEX.cpp [new file with mode: 0644]
source/gles2n64/src/L3DEX.h [new file with mode: 0644]
source/gles2n64/src/L3DEX2.cpp [new file with mode: 0644]
source/gles2n64/src/L3DEX2.h [new file with mode: 0644]
source/gles2n64/src/N64.cpp [new file with mode: 0644]
source/gles2n64/src/N64.h [new file with mode: 0644]
source/gles2n64/src/OpenGL.cpp [new file with mode: 0755]
source/gles2n64/src/OpenGL.h [new file with mode: 0644]
source/gles2n64/src/RDP.cpp [new file with mode: 0644]
source/gles2n64/src/RDP.h [new file with mode: 0644]
source/gles2n64/src/RSP.cpp [new file with mode: 0644]
source/gles2n64/src/RSP.h [new file with mode: 0644]
source/gles2n64/src/S2DEX.cpp [new file with mode: 0644]
source/gles2n64/src/S2DEX.h [new file with mode: 0644]
source/gles2n64/src/S2DEX2.cpp [new file with mode: 0644]
source/gles2n64/src/S2DEX2.h [new file with mode: 0644]
source/gles2n64/src/ShaderCombiner.cpp [new file with mode: 0755]
source/gles2n64/src/ShaderCombiner.h [new file with mode: 0644]
source/gles2n64/src/Textures.cpp [new file with mode: 0644]
source/gles2n64/src/Textures.h [new file with mode: 0644]
source/gles2n64/src/Types.h [new file with mode: 0644]
source/gles2n64/src/VI.cpp [new file with mode: 0644]
source/gles2n64/src/VI.h [new file with mode: 0644]
source/gles2n64/src/convert.h [new file with mode: 0644]
source/gles2n64/src/eglport.cpp [new file with mode: 0755]
source/gles2n64/src/eglport.h [new file with mode: 0755]
source/gles2n64/src/gDP.cpp [new file with mode: 0644]
source/gles2n64/src/gDP.h [new file with mode: 0644]
source/gles2n64/src/gSP.cpp [new file with mode: 0644]
source/gles2n64/src/gSP.h [new file with mode: 0644]
source/gles2n64/src/gSPNeon.cpp [new file with mode: 0644]
source/gles2n64/src/gles2N64.cpp [new file with mode: 0755]
source/gles2n64/src/gles2N64.h [new file with mode: 0644]
source/gles2n64/src/sdl2_compat.h [new file with mode: 0644]
source/gles2n64/src/ticks.c [new file with mode: 0644]
source/gles2n64/src/ticks.h [new file with mode: 0644]
source/gles2n64/src/video_api_export.ver [new file with mode: 0644]

diff --git a/source/gles2n64/Android.mk b/source/gles2n64/Android.mk
new file mode 100644 (file)
index 0000000..f2ae620
--- /dev/null
@@ -0,0 +1,87 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+SRCDIR := src
+
+LOCAL_MODULE := gles2n64
+LOCAL_SHARED_LIBRARIES := ae-imports SDL2 core
+LOCAL_STATIC_LIBRARIES := cpufeatures
+LOCAL_ARM_MODE := arm
+
+LOCAL_C_INCLUDES :=         \
+    $(M64P_API_INCLUDES)    \
+    $(SDL_INCLUDES)         \
+    $(AE_BRIDGE_INCLUDES)   \
+
+LOCAL_SRC_FILES :=                  \
+    $(SRCDIR)/2xSAI.cpp             \
+    $(SRCDIR)/3DMath.cpp            \
+    $(SRCDIR)/Config.cpp            \
+    $(SRCDIR)/CRC.cpp               \
+    $(SRCDIR)/DepthBuffer.cpp       \
+    $(SRCDIR)/F3D.cpp               \
+    $(SRCDIR)/F3DCBFD.cpp           \
+    $(SRCDIR)/F3DDKR.cpp            \
+    $(SRCDIR)/F3DEX.cpp             \
+    $(SRCDIR)/F3DEX2.cpp            \
+    $(SRCDIR)/F3DPD.cpp             \
+    $(SRCDIR)/F3DWRUS.cpp           \
+    $(SRCDIR)/FrameSkipper.cpp      \
+    $(SRCDIR)/GBI.cpp               \
+    $(SRCDIR)/gDP.cpp               \
+    $(SRCDIR)/gles2N64.cpp          \
+    $(SRCDIR)/gSP.cpp               \
+    $(SRCDIR)/L3D.cpp               \
+    $(SRCDIR)/L3DEX.cpp             \
+    $(SRCDIR)/L3DEX2.cpp            \
+    $(SRCDIR)/N64.cpp               \
+    $(SRCDIR)/OpenGL.cpp            \
+    $(SRCDIR)/RDP.cpp               \
+    $(SRCDIR)/RSP.cpp               \
+    $(SRCDIR)/S2DEX.cpp             \
+    $(SRCDIR)/S2DEX2.cpp            \
+    $(SRCDIR)/ShaderCombiner.cpp    \
+    $(SRCDIR)/Textures.cpp          \
+    $(SRCDIR)/ticks.c               \
+    $(SRCDIR)/VI.cpp                \
+
+LOCAL_CFLAGS :=         \
+    $(COMMON_CFLAGS)    \
+    -D__CRC_OPT         \
+    -D__HASHMAP_OPT     \
+    -D__TRIBUFFER_OPT   \
+    -D__VEC4_OPT        \
+    -DANDROID           \
+    -DUSE_SDL           \
+    -fsigned-char       \
+    #-DSDL_NO_COMPAT     \
+    
+LOCAL_CPPFLAGS := $(COMMON_CPPFLAGS)
+    
+LOCAL_LDFLAGS := -Wl,-version-script,$(LOCAL_PATH)/$(SRCDIR)/video_api_export.ver
+
+LOCAL_LDLIBS :=         \
+    -lGLESv2            \
+    -llog               \
+
+ifeq ($(TARGET_ARCH_ABI), armeabi-v7a)
+    # Use for ARM7a:
+    LOCAL_SRC_FILES += $(SRCDIR)/gSPNeon.cpp.neon
+    LOCAL_SRC_FILES += $(SRCDIR)/3DMathNeon.cpp.neon 
+    LOCAL_CFLAGS += -DARM_ASM
+    LOCAL_CFLAGS += -D__NEON_OPT
+
+else ifeq ($(TARGET_ARCH_ABI), armeabi)
+    # Use for pre-ARM7a:
+
+else ifeq ($(TARGET_ARCH_ABI), x86)
+    # TODO: set the proper flags here
+
+else
+    # Any other architectures that Android could be running on?
+
+endif
+
+include $(BUILD_SHARED_LIBRARY)
+
+$(call import-module, android/cpufeatures)
diff --git a/source/gles2n64/Makefile b/source/gles2n64/Makefile
new file mode 100755 (executable)
index 0000000..3048cfa
--- /dev/null
@@ -0,0 +1,176 @@
+PIC ?= 1
+NO_ASM := 1
+CFLAGS += -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -mtune=cortex-a8 -fsigned-char -DNO_ASM -DPAULSCODE -Ofast -ffast-math -fno-strict-aliasing
+CFLAGS += -I ../mupen64plus-core/src/api/
+CFLAGS += `sdl-config --cflags`
+
+SHARED = -shared
+SRCDIR := src
+
+MODULE := gles2n64
+SHARED_LIBRARIES := SDL core
+ARM_MODE := arm
+
+C_INCLUDES :=         \
+    $(M64P_API_INCLUDES)    \
+    $(SDL_INCLUDES)         \
+
+SOURCE :=                  \
+    $(SRCDIR)/2xSAI.cpp             \
+    $(SRCDIR)/3DMath.cpp            \
+    $(SRCDIR)/Config.cpp            \
+    $(SRCDIR)/CRC.cpp               \
+    $(SRCDIR)/DepthBuffer.cpp       \
+    $(SRCDIR)/F3D.cpp               \
+    $(SRCDIR)/F3DCBFD.cpp           \
+    $(SRCDIR)/F3DDKR.cpp            \
+    $(SRCDIR)/F3DEX.cpp             \
+    $(SRCDIR)/F3DEX2.cpp            \
+    $(SRCDIR)/F3DPD.cpp             \
+    $(SRCDIR)/F3DWRUS.cpp           \
+    $(SRCDIR)/FrameSkipper.cpp      \
+    $(SRCDIR)/GBI.cpp               \
+    $(SRCDIR)/gDP.cpp               \
+    $(SRCDIR)/gles2N64.cpp          \
+    $(SRCDIR)/gSP.cpp               \
+    $(SRCDIR)/L3D.cpp               \
+    $(SRCDIR)/L3DEX.cpp             \
+    $(SRCDIR)/L3DEX2.cpp            \
+    $(SRCDIR)/N64.cpp               \
+    $(SRCDIR)/OpenGL.cpp            \
+    $(SRCDIR)/RDP.cpp               \
+    $(SRCDIR)/RSP.cpp               \
+    $(SRCDIR)/S2DEX.cpp             \
+    $(SRCDIR)/S2DEX2.cpp            \
+    $(SRCDIR)/ShaderCombiner.cpp    \
+    $(SRCDIR)/Textures.cpp          \
+    $(SRCDIR)/ticks.c               \
+    $(SRCDIR)/VI.cpp                \
+       $(SRCDIR)/eglport.cpp                   \
+
+CFLAGS +=         \
+    $(COMMON_CFLAGS)    \
+    -D__CRC_OPT         \
+    -D__HASHMAP_OPT     \
+    -D__TRIBUFFER_OPT   \
+    -D__VEC4_OPT        \
+    -DUSE_SDL           \
+    -fsigned-char       \
+    #-DSDL_NO_COMPAT     \
+    
+ifeq ($(PIC), 1)
+  CFLAGS += -fPIC
+else
+  CFLAGS += -fno-PIC
+endif
+
+CPPFLAGS := $(CPPFLAGS)
+    
+LDLIBS :=         \
+    -lGLESv2           \
+       -lEGL                   \
+       -lrt                    \
+
+# Use for ARM7a:
+SOURCE += $(SRCDIR)/gSPNeon.cpp
+SOURCE += $(SRCDIR)/3DMathNeon.cpp
+CFLAGS += -DARM_ASM
+CFLAGS += -D__NEON_OPT
+
+LDFLAGS += $(SHARED)
+
+include $(BUILD_SHARED_LIBRARY)
+
+# set base program pointers and flags
+CC        = $(CROSS_COMPILE)gcc
+CXX       = $(CROSS_COMPILE)g++
+RM       ?= rm -f
+INSTALL  ?= install
+MKDIR ?= mkdir -p
+COMPILE.c = $(Q_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -gdwarf-2 -c
+COMPILE.cc = $(Q_CXX)$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -gdwarf-2 -c
+LINK.o = $(Q_LD)$(CXX) $(CXXFLAGS) $(LDFLAGS) $(TARGET_ARCH) -gdwarf-2
+
+# set installation options
+ifeq ($(PREFIX),)
+  PREFIX := /usr/local
+endif
+ifeq ($(SHAREDIR),)
+  SHAREDIR := $(PREFIX)/share/mupen64plus
+endif
+ifeq ($(LIBDIR),)
+  LIBDIR := $(PREFIX)/lib
+endif
+ifeq ($(PLUGINDIR),)
+  PLUGINDIR := $(LIBDIR)/mupen64plus
+endif
+
+OBJDIR = _obj$(POSTFIX)
+
+# generate a list of object files build, make a temporary directory for them
+OBJECTS := $(patsubst $(SRCDIR)/%.c, $(OBJDIR)/%.o, $(filter %.c, $(SOURCE)))
+OBJECTS += $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(filter %.cpp, $(SOURCE)))
+OBJDIRS = $(dir $(OBJECTS))
+$(shell $(MKDIR) $(OBJDIRS))
+
+# build targets
+TARGET = mupen64plus-video-gles2n64.so
+
+targets:
+       @echo "Mupen64plus-video-rice N64 Graphics plugin makefile. "
+       @echo "  Targets:"
+       @echo "    all           == Build Mupen64plus-video-rice plugin"
+       @echo "    clean         == remove object files"
+       @echo "    rebuild       == clean and re-build all"
+       @echo "    install       == Install Mupen64Plus-video-rice plugin"
+       @echo "    uninstall     == Uninstall Mupen64Plus-video-rice plugin"
+       @echo "  Options:"
+       @echo "    BITS=32       == build 32-bit binaries on 64-bit machine"
+       @echo "    NO_ASM=1      == build without inline assembly code (x86 MMX/SSE)"
+       @echo "    APIDIR=path   == path to find Mupen64Plus Core headers"
+       @echo "    OPTFLAGS=flag == compiler optimization (default: -O3)"
+       @echo "    WARNFLAGS=flag == compiler warning levels (default: -Wall)"
+       @echo "    PIC=(1|0)     == Force enable/disable of position independent code"
+       @echo "    POSTFIX=name  == String added to the name of the the build (default: '')"
+       @echo "  Install Options:"
+       @echo "    PREFIX=path   == install/uninstall prefix (default: /usr/local)"
+       @echo "    SHAREDIR=path == path to install shared data files (default: PREFIX/share/mupen64plus)"
+       @echo "    LIBDIR=path   == library prefix (default: PREFIX/lib)"
+       @echo "    PLUGINDIR=path == path to install plugin libraries (default: LIBDIR/mupen64plus)"
+       @echo "    DESTDIR=path  == path to prepend to all installation paths (only for packagers)"
+       @echo "  Debugging Options:"
+       @echo "    DEBUG=1       == add debugging symbols"
+       @echo "    V=1           == show verbose compiler output"
+
+all: $(TARGET)
+
+install: $(TARGET)
+       $(INSTALL) -d "$(DESTDIR)$(PLUGINDIR)"
+       $(INSTALL) -m 0644 $(INSTALL_STRIP_FLAG) $(TARGET) "$(DESTDIR)$(PLUGINDIR)"
+       $(INSTALL) -d "$(DESTDIR)$(SHAREDIR)"
+
+uninstall:
+       $(RM) "$(DESTDIR)$(PLUGINDIR)/$(TARGET)"
+
+clean:
+       $(RM) -r $(OBJDIR) $(TARGET)
+
+rebuild: clean all
+
+# build dependency files
+CFLAGS += -MD
+-include $(OBJECTS:.o=.d)
+
+CXXFLAGS += $(CFLAGS)
+
+# standard build rules
+$(OBJDIR)/%.o: $(SRCDIR)/%.c
+       $(COMPILE.c) -o $@ $<
+
+$(OBJDIR)/%.o: $(SRCDIR)/%.cpp
+       $(COMPILE.cc) -o $@ $<
+
+$(TARGET): $(OBJECTS)
+       $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+.PHONY: all clean install uninstall targets
diff --git a/source/gles2n64/src/2xSAI.cpp b/source/gles2n64/src/2xSAI.cpp
new file mode 100644 (file)
index 0000000..516f773
--- /dev/null
@@ -0,0 +1,647 @@
+#include "Types.h"
+//#include "GBI.h"
+
+static inline s16 GetResult1( u32 A, u32 B, u32 C, u32 D, u32 E )
+{
+    s16 x = 0;
+    s16 y = 0;
+    s16 r = 0;
+
+    if (A == C) x += 1; else if (B == C) y += 1;
+    if (A == D) x += 1; else if (B == D) y += 1;
+    if (x <= 1) r += 1;
+    if (y <= 1) r -= 1;
+
+    return r;
+}
+
+static inline s16 GetResult2( u32 A, u32 B, u32 C, u32 D, u32 E)
+{
+    s16 x = 0;
+    s16 y = 0;
+    s16 r = 0;
+
+    if (A == C) x += 1; else if (B == C) y += 1;
+    if (A == D) x += 1; else if (B == D) y += 1;
+    if (x <= 1) r -= 1;
+    if (y <= 1) r += 1;
+
+    return r;
+}
+
+
+static inline s16 GetResult( u32 A, u32 B, u32 C, u32 D )
+{
+    s16 x = 0;
+    s16 y = 0;
+    s16 r = 0;
+
+    if (A == C) x += 1; else if (B == C) y += 1;
+    if (A == D) x += 1; else if (B == D) y += 1;
+    if (x <= 1) r += 1;
+    if (y <= 1) r -= 1;
+
+    return r;
+}
+
+static inline u16 INTERPOLATE4444( u16 A, u16 B)
+{
+    if (A != B)
+        return  ((A & 0xEEEE) >> 1) +
+                (((B & 0xEEEE) >> 1) |
+                (A & B & 0x1111));
+    else
+        return A;
+}
+
+static inline u16 INTERPOLATE5551( u16 A, u16 B)
+{
+    if (A != B)
+        return  ((A & 0xF7BC) >> 1) +
+                (((B & 0xF7BC) >> 1) |
+                (A & B & 0x0843));
+    else
+        return A;
+}
+
+static inline u32 INTERPOLATE8888( u32 A, u32 B)
+{
+    if (A != B)
+        return  ((A & 0xFEFEFEFE) >> 1) +
+                (((B & 0xFEFEFEFE) >> 1) |
+                (A & B & 0x01010101));
+    else
+        return A;
+}
+
+static inline u16 Q_INTERPOLATE4444( u16 A, u16 B, u16 C, u16 D)
+{
+    u16 x = ((A & 0xCCCC) >> 2) +
+                ((B & 0xCCCC) >> 2) +
+                ((C & 0xCCCC) >> 2) +
+                ((D & 0xCCCC) >> 2);
+    u16 y = (((A & 0x3333) +
+                (B & 0x3333) +
+                (C & 0x3333) +
+                (D & 0x3333)) >> 2) & 0x3333;
+    return x | y;
+}
+
+static inline u16 Q_INTERPOLATE5551( u16 A, u16 B, u16 C, u16 D)
+{
+    u16 x = ((A & 0xE738) >> 2) +
+                ((B & 0xE738) >> 2) +
+                ((C & 0xE738) >> 2) +
+                ((D & 0xE738) >> 2);
+    u16 y = (((A & 0x18C6) +
+                (B & 0x18C6) +
+                (C & 0x18C6) +
+                (D & 0x18C6)) >> 2) & 0x18C6;
+    u16 z = ((A & 0x0001) +
+                (B & 0x0001) +
+                (C & 0x0001) +
+                (D & 0x0001)) > 2 ? 1 : 0;
+    return x | y | z;
+}
+
+static inline u32 Q_INTERPOLATE8888( u32 A, u32 B, u32 C, u32 D)
+{
+    u32 x = ((A & 0xFCFCFCFC) >> 2) +
+                ((B & 0xFCFCFCFC) >> 2) +
+                ((C & 0xFCFCFCFC) >> 2) +
+                ((D & 0xFCFCFCFC) >> 2);
+    u32 y = (((A & 0x03030303) +
+                (B & 0x03030303) +
+                (C & 0x03030303) +
+                (D & 0x03030303)) >> 2) & 0x03030303;
+    return x | y;
+}
+
+void _2xSaI4444( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT )
+{
+    u16 destWidth = width << 1;
+    //u16 destHeight = height << 1;
+
+    u32 colorA, colorB, colorC, colorD,
+          colorE, colorF, colorG, colorH,
+          colorI, colorJ, colorK, colorL,
+          colorM, colorN, colorO, colorP;
+    u32 product, product1, product2;
+
+    s16 row0, row1, row2, row3;
+    s16 col0, col1, col2, col3;
+
+    for (u16 y = 0; y < height; y++)
+    {
+        if (y > 0)
+            row0 = -width;
+        else
+            row0 = clampT ? 0 : (height - 1) * width;
+
+        row1 = 0;
+
+        if (y < height - 1)
+        {
+            row2 = width;
+
+            if (y < height - 2)
+                row3 = width << 1;
+            else
+                row3 = clampT ? width : -y * width;
+        }
+        else
+        {
+            row2 = clampT ? 0 : -y * width;
+            row3 = clampT ? 0 : (1 - y) * width;
+        }
+
+        for (u16 x = 0; x < width; x++)
+        {
+            if (x > 0)
+                col0 = -1;
+            else
+                col0 = clampS ? 0 : width - 1;
+
+            col1 = 0;
+
+            if (x < width - 1)
+            {
+                col2 = 1;
+
+                if (x < width - 2)
+                    col3 = 2;
+                else
+                    col3 = clampS ? 1 : -x;
+            }
+            else
+            {
+                col2 = clampS ? 0 : -x;
+                col3 = clampS ? 0 : 1 - x;
+            }
+
+//---------------------------------------
+// Map of the pixels:                    I|E F|J
+//                                       G|A B|K
+//                                       H|C D|L
+//                                       M|N O|P
+            colorI = *(srcPtr + col0 + row0);
+            colorE = *(srcPtr + col1 + row0);
+            colorF = *(srcPtr + col2 + row0);
+            colorJ = *(srcPtr + col3 + row0);
+
+            colorG = *(srcPtr + col0 + row1);
+            colorA = *(srcPtr + col1 + row1);
+            colorB = *(srcPtr + col2 + row1);
+            colorK = *(srcPtr + col3 + row1);
+
+            colorH = *(srcPtr + col0 + row2);
+            colorC = *(srcPtr + col1 + row2);
+            colorD = *(srcPtr + col2 + row2);
+            colorL = *(srcPtr + col3 + row2);
+
+            colorM = *(srcPtr + col0 + row3);
+            colorN = *(srcPtr + col1 + row3);
+            colorO = *(srcPtr + col2 + row3);
+            colorP = *(srcPtr + col3 + row3);
+
+            if ((colorA == colorD) && (colorB != colorC))
+            {
+                if ( ((colorA == colorE) && (colorB == colorL)) ||
+                    ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) )
+                    product = colorA;
+                else
+                    product = INTERPOLATE4444(colorA, colorB);
+
+                if (((colorA == colorG) && (colorC == colorO)) ||
+                    ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) )
+                    product1 = colorA;
+                else
+                    product1 = INTERPOLATE4444(colorA, colorC);
+
+                product2 = colorA;
+            }
+            else if ((colorB == colorC) && (colorA != colorD))
+            {
+                if (((colorB == colorF) && (colorA == colorH)) ||
+                    ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) )
+                    product = colorB;
+                else
+                    product = INTERPOLATE4444(colorA, colorB);
+
+                if (((colorC == colorH) && (colorA == colorF)) ||
+                    ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) )
+                    product1 = colorC;
+                else
+                    product1 = INTERPOLATE4444(colorA, colorC);
+                product2 = colorB;
+            }
+            else if ((colorA == colorD) && (colorB == colorC))
+            {
+                if (colorA == colorB)
+                {
+                    product = colorA;
+                    product1 = colorA;
+                    product2 = colorA;
+                }
+                else
+                {
+                    s16 r = 0;
+                    product1 = INTERPOLATE4444(colorA, colorC);
+                    product = INTERPOLATE4444(colorA, colorB);
+
+                    r += GetResult1 (colorA, colorB, colorG, colorE, colorI);
+                    r += GetResult2 (colorB, colorA, colorK, colorF, colorJ);
+                    r += GetResult2 (colorB, colorA, colorH, colorN, colorM);
+                    r += GetResult1 (colorA, colorB, colorL, colorO, colorP);
+
+                    if (r > 0)
+                        product2 = colorA;
+                    else if (r < 0)
+                        product2 = colorB;
+                    else
+                        product2 = Q_INTERPOLATE4444(colorA, colorB, colorC, colorD);
+                }
+            }
+            else
+            {
+                product2 = Q_INTERPOLATE4444(colorA, colorB, colorC, colorD);
+
+                if ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ))
+                    product = colorA;
+                else if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI))
+                    product = colorB;
+                else
+                    product = INTERPOLATE4444(colorA, colorB);
+
+                if ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM))
+                    product1 = colorA;
+                else if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI))
+                    product1 = colorC;
+                else
+                    product1 = INTERPOLATE4444(colorA, colorC);
+            }
+
+            destPtr[0] = colorA;
+            destPtr[1] = product;
+            destPtr[destWidth] = product1;
+            destPtr[destWidth + 1] = product2;
+
+            srcPtr++;
+            destPtr += 2;
+        }
+        destPtr += destWidth;
+    }
+}
+
+void _2xSaI5551( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT )
+{
+    u16 destWidth = width << 1;
+    //u16 destHeight = height << 1;
+
+    u32 colorA, colorB, colorC, colorD,
+          colorE, colorF, colorG, colorH,
+          colorI, colorJ, colorK, colorL,
+          colorM, colorN, colorO, colorP;
+    u32 product, product1, product2;
+
+    s16 row0, row1, row2, row3;
+    s16 col0, col1, col2, col3;
+
+    for (u16 y = 0; y < height; y++)
+    {
+        if (y > 0)
+            row0 = -width;
+        else
+            row0 = clampT ? 0 : (height - 1) * width;
+
+        row1 = 0;
+
+        if (y < height - 1)
+        {
+            row2 = width;
+
+            if (y < height - 2)
+                row3 = width << 1;
+            else
+                row3 = clampT ? width : -y * width;
+        }
+        else
+        {
+            row2 = clampT ? 0 : -y * width;
+            row3 = clampT ? 0 : (1 - y) * width;
+        }
+
+        for (u16 x = 0; x < width; x++)
+        {
+            if (x > 0)
+                col0 = -1;
+            else
+                col0 = clampS ? 0 : width - 1;
+
+            col1 = 0;
+
+            if (x < width - 1)
+            {
+                col2 = 1;
+
+                if (x < width - 2)
+                    col3 = 2;
+                else
+                    col3 = clampS ? 1 : -x;
+            }
+            else
+            {
+                col2 = clampS ? 0 : -x;
+                col3 = clampS ? 0 : 1 - x;
+            }
+
+//---------------------------------------
+// Map of the pixels:                    I|E F|J
+//                                       G|A B|K
+//                                       H|C D|L
+//                                       M|N O|P
+            colorI = *(srcPtr + col0 + row0);
+            colorE = *(srcPtr + col1 + row0);
+            colorF = *(srcPtr + col2 + row0);
+            colorJ = *(srcPtr + col3 + row0);
+
+            colorG = *(srcPtr + col0 + row1);
+            colorA = *(srcPtr + col1 + row1);
+            colorB = *(srcPtr + col2 + row1);
+            colorK = *(srcPtr + col3 + row1);
+
+            colorH = *(srcPtr + col0 + row2);
+            colorC = *(srcPtr + col1 + row2);
+            colorD = *(srcPtr + col2 + row2);
+            colorL = *(srcPtr + col3 + row2);
+
+            colorM = *(srcPtr + col0 + row3);
+            colorN = *(srcPtr + col1 + row3);
+            colorO = *(srcPtr + col2 + row3);
+            colorP = *(srcPtr + col3 + row3);
+
+            if ((colorA == colorD) && (colorB != colorC))
+            {
+                if ( ((colorA == colorE) && (colorB == colorL)) ||
+                    ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) )
+                    product = colorA;
+                else
+                    product = INTERPOLATE5551(colorA, colorB);
+
+                if (((colorA == colorG) && (colorC == colorO)) ||
+                    ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) )
+                    product1 = colorA;
+                else
+                    product1 = INTERPOLATE5551(colorA, colorC);
+
+                product2 = colorA;
+            }
+            else if ((colorB == colorC) && (colorA != colorD))
+            {
+                if (((colorB == colorF) && (colorA == colorH)) ||
+                    ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) )
+                    product = colorB;
+                else
+                    product = INTERPOLATE5551(colorA, colorB);
+
+                if (((colorC == colorH) && (colorA == colorF)) ||
+                    ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) )
+                    product1 = colorC;
+                else
+                    product1 = INTERPOLATE5551(colorA, colorC);
+                product2 = colorB;
+            }
+            else if ((colorA == colorD) && (colorB == colorC))
+            {
+                if (colorA == colorB)
+                {
+                    product = colorA;
+                    product1 = colorA;
+                    product2 = colorA;
+                }
+                else
+                {
+                    s16 r = 0;
+                    product1 = INTERPOLATE5551(colorA, colorC);
+                    product = INTERPOLATE5551(colorA, colorB);
+
+                    r += GetResult1 (colorA, colorB, colorG, colorE, colorI);
+                    r += GetResult2 (colorB, colorA, colorK, colorF, colorJ);
+                    r += GetResult2 (colorB, colorA, colorH, colorN, colorM);
+                    r += GetResult1 (colorA, colorB, colorL, colorO, colorP);
+
+                    if (r > 0)
+                        product2 = colorA;
+                    else if (r < 0)
+                        product2 = colorB;
+                    else
+                        product2 = Q_INTERPOLATE5551(colorA, colorB, colorC, colorD);
+                }
+            }
+            else
+            {
+                product2 = Q_INTERPOLATE5551(colorA, colorB, colorC, colorD);
+
+                if ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ))
+                    product = colorA;
+                else if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI))
+                    product = colorB;
+                else
+                    product = INTERPOLATE5551(colorA, colorB);
+
+                if ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM))
+                    product1 = colorA;
+                else if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI))
+                    product1 = colorC;
+                else
+                    product1 = INTERPOLATE5551(colorA, colorC);
+            }
+
+            destPtr[0] = colorA;
+            destPtr[1] = product;
+            destPtr[destWidth] = product1;
+            destPtr[destWidth + 1] = product2;
+
+            srcPtr++;
+            destPtr += 2;
+        }
+        destPtr += destWidth;
+    }
+}
+
+void _2xSaI8888( u32 *srcPtr, u32 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT )
+{
+    u16 destWidth = width << 1;
+    //u16 destHeight = height << 1;
+
+    u32 colorA, colorB, colorC, colorD,
+          colorE, colorF, colorG, colorH,
+          colorI, colorJ, colorK, colorL,
+          colorM, colorN, colorO, colorP;
+    u32 product, product1, product2;
+
+    s16 row0, row1, row2, row3;
+    s16 col0, col1, col2, col3;
+
+    for (u16 y = 0; y < height; y++)
+    {
+        if (y > 0)
+            row0 = -width;
+        else
+            row0 = clampT ? 0 : (height - 1) * width;
+
+        row1 = 0;
+
+        if (y < height - 1)
+        {
+            row2 = width;
+
+            if (y < height - 2)
+                row3 = width << 1;
+            else
+                row3 = clampT ? width : -y * width;
+        }
+        else
+        {
+            row2 = clampT ? 0 : -y * width;
+            row3 = clampT ? 0 : (1 - y) * width;
+        }
+
+        for (u16 x = 0; x < width; x++)
+        {
+            if (x > 0)
+                col0 = -1;
+            else
+                col0 = clampS ? 0 : width - 1;
+
+            col1 = 0;
+
+            if (x < width - 1)
+            {
+                col2 = 1;
+
+                if (x < width - 2)
+                    col3 = 2;
+                else
+                    col3 = clampS ? 1 : -x;
+            }
+            else
+            {
+                col2 = clampS ? 0 : -x;
+                col3 = clampS ? 0 : 1 - x;
+            }
+
+//---------------------------------------
+// Map of the pixels:                    I|E F|J
+//                                       G|A B|K
+//                                       H|C D|L
+//                                       M|N O|P
+            colorI = *(srcPtr + col0 + row0);
+            colorE = *(srcPtr + col1 + row0);
+            colorF = *(srcPtr + col2 + row0);
+            colorJ = *(srcPtr + col3 + row0);
+
+            colorG = *(srcPtr + col0 + row1);
+            colorA = *(srcPtr + col1 + row1);
+            colorB = *(srcPtr + col2 + row1);
+            colorK = *(srcPtr + col3 + row1);
+
+            colorH = *(srcPtr + col0 + row2);
+            colorC = *(srcPtr + col1 + row2);
+            colorD = *(srcPtr + col2 + row2);
+            colorL = *(srcPtr + col3 + row2);
+
+            colorM = *(srcPtr + col0 + row3);
+            colorN = *(srcPtr + col1 + row3);
+            colorO = *(srcPtr + col2 + row3);
+            colorP = *(srcPtr + col3 + row3);
+
+            if ((colorA == colorD) && (colorB != colorC))
+            {
+                if ( ((colorA == colorE) && (colorB == colorL)) ||
+                    ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) )
+                    product = colorA;
+                else
+                    product = INTERPOLATE8888(colorA, colorB);
+
+                if (((colorA == colorG) && (colorC == colorO)) ||
+                    ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) )
+                    product1 = colorA;
+                else
+                    product1 = INTERPOLATE8888(colorA, colorC);
+
+                product2 = colorA;
+            }
+            else if ((colorB == colorC) && (colorA != colorD))
+            {
+                if (((colorB == colorF) && (colorA == colorH)) ||
+                    ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) )
+                    product = colorB;
+                else
+                    product = INTERPOLATE8888(colorA, colorB);
+
+                if (((colorC == colorH) && (colorA == colorF)) ||
+                    ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) )
+                    product1 = colorC;
+                else
+                    product1 = INTERPOLATE8888(colorA, colorC);
+                product2 = colorB;
+            }
+            else if ((colorA == colorD) && (colorB == colorC))
+            {
+                if (colorA == colorB)
+                {
+                    product = colorA;
+                    product1 = colorA;
+                    product2 = colorA;
+                }
+                else
+                {
+                    s16 r = 0;
+                    product1 = INTERPOLATE8888(colorA, colorC);
+                    product = INTERPOLATE8888(colorA, colorB);
+
+                    r += GetResult1 (colorA, colorB, colorG, colorE, colorI);
+                    r += GetResult2 (colorB, colorA, colorK, colorF, colorJ);
+                    r += GetResult2 (colorB, colorA, colorH, colorN, colorM);
+                    r += GetResult1 (colorA, colorB, colorL, colorO, colorP);
+
+                    if (r > 0)
+                        product2 = colorA;
+                    else if (r < 0)
+                        product2 = colorB;
+                    else
+                        product2 = Q_INTERPOLATE8888(colorA, colorB, colorC, colorD);
+                }
+            }
+            else
+            {
+                product2 = Q_INTERPOLATE8888(colorA, colorB, colorC, colorD);
+
+                if ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ))
+                    product = colorA;
+                else if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI))
+                    product = colorB;
+                else
+                    product = INTERPOLATE8888(colorA, colorB);
+
+                if ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM))
+                    product1 = colorA;
+                else if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI))
+                    product1 = colorC;
+                else
+                    product1 = INTERPOLATE8888(colorA, colorC);
+            }
+
+            destPtr[0] = colorA;
+            destPtr[1] = product;
+            destPtr[destWidth] = product1;
+            destPtr[destWidth + 1] = product2;
+
+            srcPtr++;
+            destPtr += 2;
+        }
+        destPtr += destWidth;
+    }
+}
+
diff --git a/source/gles2n64/src/2xSAI.h b/source/gles2n64/src/2xSAI.h
new file mode 100644 (file)
index 0000000..1b47cc9
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _2XSAI_H
+#define _2XSAI_H
+#include "Types.h"
+
+void _2xSaI8888( u32 *srcPtr, u32 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT );
+void _2xSaI4444( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT );
+void _2xSaI5551( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT );
+#endif
+
diff --git a/source/gles2n64/src/3DMath.cpp b/source/gles2n64/src/3DMath.cpp
new file mode 100644 (file)
index 0000000..bb684c7
--- /dev/null
@@ -0,0 +1,67 @@
+#include <math.h>
+
+static void MultMatrix_default( float m0[4][4], float m1[4][4],
+        float dest[4][4])
+{
+    int i;
+    for (i = 0; i < 4; i++)
+    {
+        dest[0][i] = m0[0][i]*m1[0][0] + m0[1][i]*m1[0][1] + m0[2][i]*m1[0][2] + m0[3][i]*m1[0][3];
+        dest[1][i] = m0[0][i]*m1[1][0] + m0[1][i]*m1[1][1] + m0[2][i]*m1[1][2] + m0[3][i]*m1[1][3];
+        dest[2][i] = m0[0][i]*m1[2][0] + m0[1][i]*m1[2][1] + m0[2][i]*m1[2][2] + m0[3][i]*m1[2][3];
+        dest[3][i] = m0[3][i]*m1[3][3] + m0[2][i]*m1[3][2] + m0[1][i]*m1[3][1] + m0[0][i]*m1[3][0];
+    }
+}
+
+static void TransformVectorNormalize_default(float vec[3], float mtx[4][4])
+{
+    float len;
+
+    vec[0] = mtx[0][0] * vec[0]
+           + mtx[1][0] * vec[1]
+           + mtx[2][0] * vec[2];
+    vec[1] = mtx[0][1] * vec[0]
+           + mtx[1][1] * vec[1]
+           + mtx[2][1] * vec[2];
+    vec[2] = mtx[0][2] * vec[0]
+           + mtx[1][2] * vec[1]
+           + mtx[2][2] * vec[2];
+    len = vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2];
+    if (len != 0.0)
+    {
+        len = sqrtf(len);
+        vec[0] /= len;
+        vec[1] /= len;
+        vec[2] /= len;
+    }
+}
+
+static void Normalize_default(float v[3])
+{
+    float len;
+
+    len = (float)(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
+    if (len != 0.0)
+    {
+        len = (float)sqrt( len );
+        v[0] /= (float)len;
+        v[1] /= (float)len;
+        v[2] /= (float)len;
+    }
+}
+
+static float DotProduct_default( float v0[3], float v1[3] )
+{
+    float dot;
+    dot = v0[0]*v1[0] + v0[1]*v1[1] + v0[2]*v1[2];
+    return dot;
+}
+
+
+void (*MultMatrix)(float m0[4][4], float m1[4][4], float dest[4][4]) =
+        MultMatrix_default;
+void (*TransformVectorNormalize)(float vec[3], float mtx[4][4]) =
+        TransformVectorNormalize_default;
+void (*Normalize)(float v[3]) = Normalize_default;
+float (*DotProduct)(float v0[3], float v1[3]) = DotProduct_default;
+
diff --git a/source/gles2n64/src/3DMath.h b/source/gles2n64/src/3DMath.h
new file mode 100644 (file)
index 0000000..d639a97
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef _3DMATH_H
+#define _3DMATH_H
+
+#include <string.h>
+
+extern void (*MultMatrix)(float m0[4][4], float m1[4][4], float dest[4][4]);
+extern void (*TransformVectorNormalize)(float vec[3], float mtx[4][4]);
+extern void (*Normalize)(float v[3]);
+extern float (*DotProduct)(float v0[3], float v1[3]);
+
+inline void CopyMatrix( float m0[4][4], float m1[4][4] )
+{
+    memcpy( m0, m1, 16 * sizeof( float ) );
+}
+
+inline void MultMatrix2( float m0[4][4], float m1[4][4] )
+{
+    float dst[4][4];
+    MultMatrix(m0, m1, dst);
+    memcpy( m0, dst, sizeof(float) * 16 );
+}
+
+inline void Transpose3x3Matrix( float mtx[4][4] )
+{
+    float tmp;
+
+    tmp = mtx[0][1];
+    mtx[0][1] = mtx[1][0];
+    mtx[1][0] = tmp;
+
+    tmp = mtx[0][2];
+    mtx[0][2] = mtx[2][0];
+    mtx[2][0] = tmp;
+
+    tmp = mtx[1][2];
+    mtx[1][2] = mtx[2][1];
+    mtx[2][1] = tmp;
+}
+
+#ifdef __NEON_OPT
+void MathInitNeon();
+#endif
+
+#endif
+
diff --git a/source/gles2n64/src/3DMathNeon.cpp b/source/gles2n64/src/3DMathNeon.cpp
new file mode 100644 (file)
index 0000000..41524b6
--- /dev/null
@@ -0,0 +1,133 @@
+#include "3DMath.h"
+
+static void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4])
+{
+    asm volatile (
+       "vld1.32                {d0, d1}, [%1]!                 \n\t"   //q0 = m1
+       "vld1.32                {d2, d3}, [%1]!         \n\t"   //q1 = m1+4
+       "vld1.32                {d4, d5}, [%1]!         \n\t"   //q2 = m1+8
+       "vld1.32                {d6, d7}, [%1]          \n\t"   //q3 = m1+12
+       "vld1.32                {d16, d17}, [%0]!               \n\t"   //q8 = m0
+       "vld1.32                {d18, d19}, [%0]!       \n\t"   //q9 = m0+4
+       "vld1.32                {d20, d21}, [%0]!       \n\t"   //q10 = m0+8
+       "vld1.32                {d22, d23}, [%0]        \n\t"   //q11 = m0+12
+
+       "vmul.f32               q12, q8, d0[0]                  \n\t"   //q12 = q8 * d0[0]
+       "vmul.f32               q13, q8, d2[0]              \n\t"       //q13 = q8 * d2[0]
+       "vmul.f32               q14, q8, d4[0]              \n\t"       //q14 = q8 * d4[0]
+       "vmul.f32               q15, q8, d6[0]                  \n\t"   //q15 = q8 * d6[0]
+       "vmla.f32               q12, q9, d0[1]                  \n\t"   //q12 = q9 * d0[1]
+       "vmla.f32               q13, q9, d2[1]              \n\t"       //q13 = q9 * d2[1]
+       "vmla.f32               q14, q9, d4[1]              \n\t"       //q14 = q9 * d4[1]
+       "vmla.f32               q15, q9, d6[1]              \n\t"       //q15 = q9 * d6[1]
+       "vmla.f32               q12, q10, d1[0]                 \n\t"   //q12 = q10 * d0[0]
+       "vmla.f32               q13, q10, d3[0]                 \n\t"   //q13 = q10 * d2[0]
+       "vmla.f32               q14, q10, d5[0]                 \n\t"   //q14 = q10 * d4[0]
+       "vmla.f32               q15, q10, d7[0]                 \n\t"   //q15 = q10 * d6[0]
+       "vmla.f32               q12, q11, d1[1]                 \n\t"   //q12 = q11 * d0[1]
+       "vmla.f32               q13, q11, d3[1]                 \n\t"   //q13 = q11 * d2[1]
+       "vmla.f32               q14, q11, d5[1]                 \n\t"   //q14 = q11 * d4[1]
+       "vmla.f32               q15, q11, d7[1]             \n\t"       //q15 = q11 * d6[1]
+
+       "vst1.32                {d24, d25}, [%2]!               \n\t"   //d = q12
+       "vst1.32                {d26, d27}, [%2]!           \n\t"       //d+4 = q13
+       "vst1.32                {d28, d29}, [%2]!           \n\t"       //d+8 = q14
+       "vst1.32                {d30, d31}, [%2]            \n\t"       //d+12 = q15
+
+       :"+r"(m0), "+r"(m1), "+r"(dest):
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+    "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+    "memory"
+       );
+}
+
+static void TransformVectorNormalize_neon(float vec[3], float mtx[4][4])
+{
+       asm volatile (
+       "vld1.32                {d0}, [%1]                      \n\t"   //Q0 = v
+       "flds                   s2, [%1, #8]                    \n\t"   //Q0 = v
+       "vld1.32                {d18, d19}, [%0]!               \n\t"   //Q1 = m
+       "vld1.32                {d20, d21}, [%0]!           \n\t"       //Q2 = m+4
+       "vld1.32                {d22, d23}, [%0]            \n\t"       //Q3 = m+8
+
+       "vmul.f32               q2, q9, d0[0]                   \n\t"   //q2 = q9*Q0[0]
+       "vmla.f32               q2, q10, d0[1]                  \n\t"   //Q5 += Q1*Q0[1]
+       "vmla.f32               q2, q11, d1[0]                  \n\t"   //Q5 += Q2*Q0[2]
+
+    "vmul.f32          d0, d4, d4                              \n\t"   //d0 = d0*d0
+       "vpadd.f32              d0, d0, d0                              \n\t"   //d0 = d[0] + d[1]
+    "vmla.f32          d0, d5, d5                              \n\t"   //d0 = d0 + d1*d1
+
+       "vmov.f32               d1, d0                                  \n\t"   //d1 = d0
+       "vrsqrte.f32    d0, d0                                  \n\t"   //d0 = ~ 1.0 / sqrt(d0)
+       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
+       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d2) / 2
+       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d3
+       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
+       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d3) / 2
+       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d4
+
+       "vmul.f32               q2, q2, d0[0]                   \n\t"   //d0= d2*d4
+
+       "vst1.32                {d4}, [%1]                  \n\t"       //Q4 = m+12
+       "fsts                   s10, [%1, #8]           \n\t"   //Q4 = m+12
+       : "+r"(mtx): "r"(vec)
+    : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory"
+       );
+}
+
+static void Normalize_neon(float v[3])
+{
+       asm volatile (
+       "vld1.32                {d4}, [%0]!                     \n\t"   //d4={x,y}
+       "flds                   s10, [%0]               \n\t"   //d5[0] = z
+       "sub                    %0, %0, #8              \n\t"   //d5[0] = z
+       "vmul.f32               d0, d4, d4                              \n\t"   //d0= d4*d4
+       "vpadd.f32              d0, d0, d0                              \n\t"   //d0 = d[0] + d[1]
+    "vmla.f32          d0, d5, d5                              \n\t"   //d0 = d0 + d5*d5
+
+       "vmov.f32               d1, d0                                  \n\t"   //d1 = d0
+       "vrsqrte.f32    d0, d0                                  \n\t"   //d0 = ~ 1.0 / sqrt(d0)
+       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
+       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d2) / 2
+       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d3
+       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
+       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d3) / 2
+       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d4
+
+       "vmul.f32               q2, q2, d0[0]                   \n\t"   //d0= d2*d4
+       "vst1.32                {d4}, [%0]!                     \n\t"   //d2={x0,y0}, d3={z0, w0}
+       "fsts                   s10, [%0]                       \n\t"   //d2={x0,y0}, d3={z0, w0}
+
+       :"+r"(v) :
+    : "d0", "d1", "d2", "d3", "d4", "d5", "memory"
+       );
+}
+
+static float DotProduct_neon( float v0[3], float v1[3] )
+{
+    float dot;
+       asm volatile (
+       "vld1.32                {d8}, [%1]!                     \n\t"   //d8={x0,y0}
+       "vld1.32                {d10}, [%2]!            \n\t"   //d10={x1,y1}
+       "flds                   s18, [%1, #0]       \n\t"       //d9[0]={z0}
+       "flds                   s22, [%2, #0]       \n\t"       //d11[0]={z1}
+       "vmul.f32               d12, d8, d10            \n\t"   //d0= d2*d4
+       "vpadd.f32              d12, d12, d12           \n\t"   //d0 = d[0] + d[1]
+       "vmla.f32               d12, d9, d11            \n\t"   //d0 = d0 + d3*d5
+    "fmrs              %0, s24                 \n\t"   //r0 = s0
+       : "=r"(dot), "+r"(v0), "+r"(v1):
+    : "d8", "d9", "d10", "d11", "d12"
+
+       );
+    return dot;
+}
+
+void MathInitNeon()
+{
+    MultMatrix = MultMatrix_neon;
+    TransformVectorNormalize = TransformVectorNormalize_neon;
+    Normalize = Normalize_neon;
+    DotProduct = DotProduct_neon;
+}
diff --git a/source/gles2n64/src/COPYING b/source/gles2n64/src/COPYING
new file mode 100644 (file)
index 0000000..518eaa2
--- /dev/null
@@ -0,0 +1,172 @@
+This directory contains the source code of gles2n64 ported to Android
+by yongzh (freeman.yong@gmail.com). The original source code is available at:
+
+http://code.google.com/p/gles2n64/
+
+
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions.
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/source/gles2n64/src/CRC.cpp b/source/gles2n64/src/CRC.cpp
new file mode 100644 (file)
index 0000000..9ab38a4
--- /dev/null
@@ -0,0 +1,93 @@
+#include "Types.h"
+
+#define CRC32_POLYNOMIAL     0x04C11DB7
+
+#ifdef __CRC_OPT
+unsigned int CRCTable[ 256 * 4];
+#else
+unsigned int CRCTable[ 256 ];
+#endif
+
+u32 Reflect( u32 ref, char ch )
+{
+     u32 value = 0;
+
+     // Swap bit 0 for bit 7
+     // bit 1 for bit 6, etc.
+     for (int i = 1; i < (ch + 1); i++)
+     {
+          if(ref & 1)
+               value |= 1 << (ch - i);
+          ref >>= 1;
+     }
+     return value;
+}
+
+void CRC_BuildTable()
+{
+    u32 crc;
+
+    for (int i = 0; i < 256; i++)
+    {
+        crc = Reflect( i, 8 ) << 24;
+        for (int j = 0; j < 8; j++)
+            crc = (crc << 1) ^ (crc & (1 << 31) ? CRC32_POLYNOMIAL : 0);
+
+        CRCTable[i] = Reflect( crc, 32 );
+    }
+
+#ifdef __CRC_OPT
+    for (int i = 0; i < 256; i++)
+    {
+        for(int j = 0; j < 3; j++)
+        {
+            CRCTable[256*(j+1) + i] = (CRCTable[256*j + i]>>8) ^ CRCTable[CRCTable[256*j + i]&0xFF];
+        }
+    }
+#endif
+
+}
+
+u32 CRC_Calculate( u32 crc, void *buffer, u32 count )
+{
+    u8 *p;
+    u32 orig = crc;
+
+    p = (u8*) buffer;
+
+#ifdef __CRC_OPT
+    while(count > 3)
+    {
+        crc ^= *(unsigned int*) p; p += 4;
+        crc = CRCTable[3*256 + (crc&0xFF)]
+          ^ CRCTable[2*256 + ((crc>>8)&0xFF)]
+          ^ CRCTable[1*256 + ((crc>>16)&0xFF)]
+          ^ CRCTable[0*256 + ((crc>>24))];
+
+        count -= 4;
+    }
+#endif
+
+    while (count--)
+        crc = (crc >> 8) ^ CRCTable[(crc & 0xFF) ^ *p++];
+
+    return crc ^ orig;
+}
+
+u32 CRC_CalculatePalette( u32 crc, void *buffer, u32 count )
+{
+    u8 *p;
+    u32 orig = crc;
+
+    p = (u8*) buffer;
+    while (count--)
+    {
+        crc = (crc >> 8) ^ CRCTable[(crc & 0xFF) ^ *p++];
+        crc = (crc >> 8) ^ CRCTable[(crc & 0xFF) ^ *p++];
+
+        p += 6;
+    }
+
+    return crc ^ orig;
+}
+
diff --git a/source/gles2n64/src/CRC.h b/source/gles2n64/src/CRC.h
new file mode 100644 (file)
index 0000000..cac7750
--- /dev/null
@@ -0,0 +1,7 @@
+#include "Types.h"
+
+void CRC_BuildTable();
+
+u32 CRC_Calculate( u32 crc, void *buffer, u32 count );
+u32 CRC_CalculatePalette( u32 crc, void *buffer, u32 count );
+
diff --git a/source/gles2n64/src/Common.h b/source/gles2n64/src/Common.h
new file mode 100755 (executable)
index 0000000..ac508cd
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+//#define PROFILE_GBI
+
+#define LOG_NONE       0
+#define LOG_ERROR   1
+#define LOG_MINIMAL    2
+#define LOG_WARNING 3
+#define LOG_VERBOSE 4
+
+#define LOG_LEVEL LOG_NONE
+
+# ifndef min
+#  define min(a,b) ((a) < (b) ? (a) : (b))
+# endif
+# ifndef max
+#  define max(a,b) ((a) > (b) ? (a) : (b))
+# endif
+
+
+#if LOG_LEVEL>0
+
+#include <android/log.h>
+
+#define LOG(A, ...) \
+    if (A <= LOG_LEVEL) \
+    { \
+        __android_log_print(ANDROID_LOG_DEBUG, "gles2n64", __VA_ARGS__); \
+    }
+
+#else
+
+#define LOG(A, ...)
+
+#endif
+
+#endif
diff --git a/source/gles2n64/src/Config.cpp b/source/gles2n64/src/Config.cpp
new file mode 100644 (file)
index 0000000..c34ff6c
--- /dev/null
@@ -0,0 +1,307 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *   Mupen64plus - Config_nogui.cpp                                        *
+ *   Mupen64Plus homepage: http://code.google.com/p/mupen64plus/           *
+ *   Copyright (C) 2008 Tillin9                                            *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "Config.h"
+#include "gles2N64.h"
+#include "RSP.h"
+#include "Textures.h"
+#include "OpenGL.h"
+
+#include "Config.h"
+#include "Common.h"
+
+
+Config config;
+
+struct Option
+{
+    const char* name;
+    int*  data;
+    const int   initial;
+};
+
+
+#define CONFIG_VERSION 2
+
+Option configOptions[] =
+{
+    {"#gles2n64 Graphics Plugin for N64", NULL, 0},
+    {"#by Orkin / glN64 developers and Adventus.", NULL, 0},
+
+    {"config version", &config.version, 0},
+    {"", NULL, 0},
+
+    {"#Window Settings:", NULL, 0},
+    {"window xpos", &config.window.xpos, 0},
+    {"window ypos", &config.window.ypos, 0},
+    {"window width", &config.window.width, 800},
+    {"window height", &config.window.height, 480},
+    {"window refwidth", &config.window.refwidth, 800},
+    {"window refheight", &config.window.refheight, 480},
+    {"", NULL, 0},
+
+    {"#Framebuffer Settings:",NULL,0},
+//    {"framebuffer enable", &config.framebuffer.enable, 0},
+    {"framebuffer bilinear", &config.framebuffer.bilinear, 0},
+    {"framebuffer width", &config.framebuffer.width, 400},
+    {"framebuffer height", &config.framebuffer.height, 240},
+//    {"framebuffer width", &config.framebuffer.width, 800},
+//    {"framebuffer height", &config.framebuffer.height, 480},
+    {"", NULL, 0},
+
+    {"#VI Settings:", NULL, 0},
+    {"video force", &config.video.force, 0},
+    {"video width", &config.video.width, 320},
+    {"video height", &config.video.height, 240},
+    {"", NULL, 0},
+
+    {"#Render Settings:", NULL, 0},
+    {"enable fog", &config.enableFog, 0},
+    {"enable primitive z", &config.enablePrimZ, 1},
+    {"enable lighting", &config.enableLighting, 1},
+    {"enable alpha test", &config.enableAlphaTest, 1},
+    {"enable clipping", &config.enableClipping, 0},
+    {"enable face culling", &config.enableFaceCulling, 1},
+    {"enable noise", &config.enableNoise, 0},
+    {"", NULL, 0},
+
+    {"#Texture Settings:", NULL, 0},
+    {"texture 2xSAI", &config.texture.sai2x, 0},
+    {"texture force bilinear", &config.texture.forceBilinear, 0},
+    {"texture max anisotropy", &config.texture.maxAnisotropy, 0},
+    {"texture use IA", &config.texture.useIA, 0},
+    {"texture fast CRC", &config.texture.fastCRC, 1},
+    {"texture pow2", &config.texture.pow2, 1},
+    {"", NULL, 0},
+
+    {"#Frame skip:", NULL, 0},
+    {"auto frameskip", &config.autoFrameSkip, 0},
+    {"max frameskip", &config.maxFrameSkip, 0},
+    {"target FPS", &config.targetFPS, 20},
+    {"frame render rate", &config.frameRenderRate, 1},
+    {"vertical sync", &config.verticalSync, 0},
+    {"", NULL, 0},
+
+    {"#Other Settings:", NULL, 0},
+    {"update mode", &config.updateMode, SCREEN_UPDATE_AT_VI_UPDATE },
+    {"ignore offscreen rendering", &config.ignoreOffscreenRendering, 0},
+    {"force screen clear", &config.forceBufferClear, 0},
+    {"flip vertical", &config.screen.flipVertical, 0},
+// paulscode: removed from pre-compile to a config option
+//// (part of the Galaxy S Zelda crash-fix
+    {"tribuffer opt", &config.tribufferOpt, 1},
+//
+    {"", NULL, 0},
+
+    {"#Hack Settings:", NULL, 0},
+    {"hack banjo tooie", &config.hackBanjoTooie, 0},
+    {"hack zelda", &config.hackZelda, 0},
+    {"hack alpha", &config.hackAlpha, 0},
+    {"hack z", &config.zHack, 0},
+
+};
+
+const int configOptionsSize = sizeof(configOptions) / sizeof(Option);
+
+void Config_WriteConfig(const char *filename)
+{
+    config.version = CONFIG_VERSION;
+    FILE* f = fopen(filename, "w");
+    if (!f)
+    {
+        LOG(LOG_ERROR, "Could Not Open %s for writing\n", filename);
+    }
+
+    for(int i=0; i<configOptionsSize; i++)
+    {
+        Option *o = &configOptions[i];
+        fprintf(f, "%s", o->name);
+        if (o->data) fprintf(f,"=%i", *(o->data));
+        fprintf(f, "\n");
+    }
+
+
+    fclose(f);
+}
+
+void Config_SetDefault()
+{
+    for(int i=0; i < configOptionsSize; i++)
+    {
+        Option *o = &configOptions[i];
+        if (o->data) *(o->data) = o->initial;
+    }
+}
+
+void Config_SetOption(char* line, char* val)
+{
+    for(int i=0; i< configOptionsSize; i++)
+    {
+        Option *o = &configOptions[i];
+        if (strcasecmp(line, o->name) == 0)
+        {
+            if (o->data)
+            {
+                int v = atoi(val);
+                *(o->data) = v;
+                LOG(LOG_VERBOSE, "Config Option: %s = %i\n", o->name, v);
+            }
+            break;
+        }
+    }
+}
+
+void Config_LoadRomConfig(unsigned char* header)
+{
+    char line[4096];
+
+    // get the name of the ROM
+    for (int i=0; i<20; i++) config.romName[i] = header[0x20+i];
+    config.romName[20] = '\0';
+    while (config.romName[strlen(config.romName)-1] == ' ')
+    {
+        config.romName[strlen(config.romName)-1] = '\0';
+    }
+
+    switch(header[0x3e])
+    {
+        // PAL codes
+        case 0x44:
+        case 0x46:
+        case 0x49:
+        case 0x50:
+        case 0x53:
+        case 0x55:
+        case 0x58:
+        case 0x59:
+            config.romPAL = true;
+            break;
+
+        // NTSC codes
+        case 0x37:
+        case 0x41:
+        case 0x45:
+        case 0x4a:
+            config.romPAL = false;
+            break;
+
+        // Fallback for unknown codes
+        default:
+            config.romPAL = false;
+    }
+
+    LOG(LOG_MINIMAL, "Rom is %s\n", config.romPAL ? "PAL" : "NTSC");
+
+    const char *filename = ConfigGetSharedDataFilepath("gles2n64rom.conf");
+    FILE *f = fopen(filename,"r");
+    if (!f)
+    {
+        LOG(LOG_MINIMAL, "Could not find %s Rom settings file, using global.\n", filename);
+        return;
+    }
+    else
+    {
+        LOG(LOG_MINIMAL, "[gles2N64]: Searching %s Database for \"%s\" ROM\n", filename, config.romName);
+        bool isRom = false;
+        while (!feof(f))
+        {
+            fgets(line, 4096, f);
+            if (line[0] == '\n') continue;
+
+            if (strncmp(line,"rom name=", 9) == 0)
+            {
+                //Depending on the editor, end lines could be terminated by "LF" or "CRLF"
+                char* lf = strchr(line, '\n'); //Line Feed
+                char* cr = strchr(line, '\r'); //Carriage Return
+                if (lf) *lf='\0';
+                if (cr) *cr='\0';
+                isRom = (strcasecmp(config.romName, line+9) == 0);
+            }
+            else
+            {
+                if (isRom)
+                {
+                    char* val = strchr(line, '=');
+                    if (!val) continue;
+                    *val++ = '\0';
+                    Config_SetOption(line,val);
+                    LOG(LOG_MINIMAL, "%s = %s", line, val);
+                }
+            }
+        }
+    }
+       
+    fclose(f);
+}
+
+void Config_LoadConfig()
+{
+    FILE *f;
+    char line[4096];
+
+    // default configuration
+    Config_SetDefault();
+
+    // read configuration
+    const char *filename = ConfigGetSharedDataFilepath("gles2n64.conf");
+    f = fopen(filename, "r");
+    if (!f)
+    {
+        LOG(LOG_MINIMAL, "[gles2N64]: Couldn't open config file '%s' for reading: %s\n", filename, strerror( errno ) );
+        LOG(LOG_MINIMAL, "[gles2N64]: Attempting to write new Config \n");
+        Config_WriteConfig(filename);
+    }
+    else
+    {
+        LOG(LOG_MINIMAL, "[gles2n64]: Loading Config from %s \n", filename);
+
+        while (!feof( f ))
+        {
+            char *val;
+            fgets( line, 4096, f );
+
+            if (line[0] == '#' || line[0] == '\n')
+                continue;
+
+            val = strchr( line, '=' );
+            if (!val) continue;
+
+            *val++ = '\0';
+
+             Config_SetOption(line,val);
+        }
+
+        if (config.version < CONFIG_VERSION)
+        {
+            LOG(LOG_WARNING, "[gles2N64]: Wrong config version, rewriting config with defaults\n");
+            Config_SetDefault();
+            Config_WriteConfig(filename);
+        }
+
+        fclose(f);
+    }
+}
+
diff --git a/source/gles2n64/src/Config.h b/source/gles2n64/src/Config.h
new file mode 100644 (file)
index 0000000..57fcf3f
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef CONFIG_H
+#define CONFIG_H
+
+struct Config
+{
+    int     version;
+
+    struct
+    {
+        int flipVertical;
+    } screen;
+
+    struct
+    {
+        int xpos, ypos, width, height, refwidth, refheight;
+    } window;
+
+    struct
+    {
+        int enable, bilinear;
+        int xpos, ypos, width, height;
+    } framebuffer;
+
+    struct
+    {
+        int force, width, height;
+    } video;
+
+    struct
+    {
+        int maxAnisotropy;
+        int enableMipmap;
+        int forceBilinear;
+        int sai2x;
+        int useIA;
+        int fastCRC;
+        int pow2;
+    } texture;
+
+    int     logFrameRate;
+    int     updateMode;
+    int     forceBufferClear;
+    int     ignoreOffscreenRendering;
+    int     zHack;
+
+    int     autoFrameSkip;
+    int     maxFrameSkip;
+    int     targetFPS;
+    int     frameRenderRate;
+    int     verticalSync;
+
+    int     enableFog;
+    int     enablePrimZ;
+    int     enableLighting;
+    int     enableAlphaTest;
+    int     enableClipping;
+    int     enableFaceCulling;
+    int     enableNoise;
+
+// paulscode: removed from pre-compile to a config option
+//// (part of the Galaxy S Zelda crash-fix
+    int     tribufferOpt;
+//
+
+    int     hackBanjoTooie;
+    int     hackZelda;
+    int     hackAlpha;
+
+    bool    stretchVideo;
+    bool    romPAL;    //is the rom PAL
+    char    romName[21];
+};
+
+extern Config config;
+
+void Config_LoadConfig();
+void Config_LoadRomConfig(unsigned char* header);
+#endif
+
diff --git a/source/gles2n64/src/Debug.h b/source/gles2n64/src/Debug.h
new file mode 100644 (file)
index 0000000..b1fcec2
--- /dev/null
@@ -0,0 +1,30 @@
+#if !defined( DEBUG_H ) && defined( DEBUG )
+#define DEBUG_H
+
+#include <stdio.h>
+
+#define     DEBUG_LOW       0x1000
+#define     DEBUG_MEDIUM    0x2000
+#define     DEBUG_HIGH      0x4000
+#define     DEBUG_DETAIL    0x8000
+
+#define     DEBUG_HANDLED   0x0001
+#define     DEBUG_UNHANDLED 0x0002
+#define     DEBUG_IGNORED   0x0004
+#define     DEBUG_UNKNOWN   0x0008
+#define     DEBUG_ERROR     0x0010
+#define     DEBUG_COMBINE   0x0020
+#define     DEBUG_TEXTURE   0x0040
+#define     DEBUG_VERTEX    0x0080
+#define     DEBUG_TRIANGLE  0x0100
+#define     DEBUG_MATRIX    0x0200
+
+#define OpenDebugDlg()
+#define CloseDebugDlg()
+#define StartDump(filename)
+#define EndDump()
+#define DebugMsg(type, format, ... )  printf(format, ## __VA_ARGS__)
+#define DebugRSPState(pci, pc, cmd, w0, w1)
+
+#endif // DEBUG_H
+
diff --git a/source/gles2n64/src/DepthBuffer.cpp b/source/gles2n64/src/DepthBuffer.cpp
new file mode 100644 (file)
index 0000000..8f2ebc1
--- /dev/null
@@ -0,0 +1,165 @@
+#include <stdlib.h>
+#include "DepthBuffer.h"
+#include "Types.h"
+
+DepthBufferInfo depthBuffer;
+
+void DepthBuffer_Init()
+{
+    depthBuffer.current = NULL;
+    depthBuffer.top = NULL;
+    depthBuffer.bottom = NULL;
+    depthBuffer.numBuffers = 0;
+}
+
+void DepthBuffer_RemoveBottom()
+{
+    DepthBuffer *newBottom = depthBuffer.bottom->higher;
+
+    if (depthBuffer.bottom == depthBuffer.top)
+        depthBuffer.top = NULL;
+
+    free( depthBuffer.bottom );
+
+    depthBuffer.bottom = newBottom;
+
+    if (depthBuffer.bottom != NULL)
+        depthBuffer.bottom->lower = NULL;
+
+    depthBuffer.numBuffers--;
+}
+
+void DepthBuffer_Remove( DepthBuffer *buffer )
+{
+    if ((buffer == depthBuffer.bottom) &&
+        (buffer == depthBuffer.top))
+    {
+        depthBuffer.top = NULL;
+        depthBuffer.bottom = NULL;
+    }
+    else if (buffer == depthBuffer.bottom)
+    {
+        depthBuffer.bottom = buffer->higher;
+
+        if (depthBuffer.bottom)
+            depthBuffer.bottom->lower = NULL;
+    }
+    else if (buffer == depthBuffer.top)
+    {
+        depthBuffer.top = buffer->lower;
+
+        if (depthBuffer.top)
+            depthBuffer.top->higher = NULL;
+    }
+    else
+    {
+        buffer->higher->lower = buffer->lower;
+        buffer->lower->higher = buffer->higher;
+    }
+
+    free( buffer );
+    depthBuffer.numBuffers--;
+}
+
+void DepthBuffer_RemoveBuffer( u32 address )
+{
+    DepthBuffer *current = depthBuffer.bottom;
+    while (current != NULL)
+    {
+        if (current->address == address)
+        {
+            DepthBuffer_Remove( current );
+            return;
+        }
+        current = current->higher;
+    }
+}
+
+DepthBuffer *DepthBuffer_AddTop()
+{
+    DepthBuffer *newtop = (DepthBuffer*)malloc( sizeof( DepthBuffer ) );
+
+    newtop->lower = depthBuffer.top;
+    newtop->higher = NULL;
+
+    if (depthBuffer.top)
+        depthBuffer.top->higher = newtop;
+
+    if (!depthBuffer.bottom)
+        depthBuffer.bottom = newtop;
+
+    depthBuffer.top = newtop;
+
+    depthBuffer.numBuffers++;
+
+    return newtop;
+}
+
+void DepthBuffer_MoveToTop( DepthBuffer *newtop )
+{
+    if (newtop == depthBuffer.top)
+        return;
+
+    if (newtop == depthBuffer.bottom)
+    {
+        depthBuffer.bottom = newtop->higher;
+        depthBuffer.bottom->lower = NULL;
+    }
+    else
+    {
+        newtop->higher->lower = newtop->lower;
+        newtop->lower->higher = newtop->higher;
+    }
+
+    newtop->higher = NULL;
+    newtop->lower = depthBuffer.top;
+    depthBuffer.top->higher = newtop;
+    depthBuffer.top = newtop;
+}
+
+void DepthBuffer_Destroy()
+{
+    while (depthBuffer.bottom)
+        DepthBuffer_RemoveBottom();
+
+    depthBuffer.top = NULL;
+}
+
+void DepthBuffer_SetBuffer( u32 address )
+{
+    DepthBuffer *current = depthBuffer.top;
+
+    // Search through saved depth buffers
+    while (current != NULL)
+    {
+        if (current->address == address)
+        {
+            DepthBuffer_MoveToTop( current );
+            depthBuffer.current = current;
+            return;
+        }
+        current = current->lower;
+    }
+
+    current = DepthBuffer_AddTop();
+
+    current->address = address;
+    current->cleared = TRUE;
+
+    depthBuffer.current = current;
+}
+
+DepthBuffer *DepthBuffer_FindBuffer( u32 address )
+{
+    DepthBuffer *current = depthBuffer.top;
+
+    while (current)
+    {
+        if (current->address == address)
+            return current;
+        current = current->lower;
+    }
+
+    return NULL;
+}
+
diff --git a/source/gles2n64/src/DepthBuffer.h b/source/gles2n64/src/DepthBuffer.h
new file mode 100644 (file)
index 0000000..dd7d5e6
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef DEPTHBUFFER_H
+#define DEPTHBUFFER_H
+
+#include "Types.h"
+
+struct DepthBuffer
+{
+    DepthBuffer *higher, *lower;
+
+    u32 address, cleared;
+};
+
+struct DepthBufferInfo
+{
+    DepthBuffer *top, *bottom, *current;
+    int numBuffers;
+};
+
+extern DepthBufferInfo depthBuffer;
+
+void DepthBuffer_Init();
+void DepthBuffer_Destroy();
+void DepthBuffer_SetBuffer( u32 address );
+void DepthBuffer_RemoveBuffer( u32 address );
+DepthBuffer *DepthBuffer_FindBuffer( u32 address );
+
+#endif
+
diff --git a/source/gles2n64/src/F3D.cpp b/source/gles2n64/src/F3D.cpp
new file mode 100644 (file)
index 0000000..a5524d0
--- /dev/null
@@ -0,0 +1,374 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+#include "OpenGL.h"
+#include "DepthBuffer.h"
+
+#include "Config.h"
+
+void F3D_SPNoOp( u32 w0, u32 w1 )
+{
+    gSPNoOp();
+}
+
+void F3D_Mtx( u32 w0, u32 w1 )
+{
+    if (_SHIFTR( w0, 0, 16 ) != 64)
+    {
+//      GBI_DetectUCode(); // Something's wrong
+#ifdef DEBUG
+    DebugMsg( DEBUG_MEDIUM | DEBUG_HIGH | DEBUG_ERROR, "G_MTX: address = 0x%08X    length = %i    params = 0x%02X\n", w1, _SHIFTR( w0, 0, 16 ), _SHIFTR( w0, 16, 8 ) );
+#endif
+        return;
+    }
+
+    gSPMatrix( w1, _SHIFTR( w0, 16, 8 ) );
+}
+
+void F3D_Reserved0( u32 w0, u32 w1 )
+{
+#ifdef DEBUG
+    DebugMsg( DEBUG_MEDIUM | DEBUG_IGNORED | DEBUG_UNKNOWN, "G_RESERVED0: w0=0x%08lX w1=0x%08lX\n", w0, w1 );
+#endif
+}
+
+void F3D_MoveMem( u32 w0, u32 w1 )
+{
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+    switch (_SHIFTR( w0, 16, 8 ))
+    {
+        case F3D_MV_VIEWPORT://G_MV_VIEWPORT:
+            gSPViewport( w1 );
+            break;
+        case G_MV_MATRIX_1:
+            gSPForceMatrix( w1 );
+            // force matrix takes four commands
+            RSP.PC[RSP.PCi] += 24;
+            break;
+        case G_MV_L0:
+            gSPLight( w1, LIGHT_1 );
+            break;
+        case G_MV_L1:
+            gSPLight( w1, LIGHT_2 );
+            break;
+        case G_MV_L2:
+            gSPLight( w1, LIGHT_3 );
+            break;
+        case G_MV_L3:
+            gSPLight( w1, LIGHT_4 );
+            break;
+        case G_MV_L4:
+            gSPLight( w1, LIGHT_5 );
+            break;
+        case G_MV_L5:
+            gSPLight( w1, LIGHT_6 );
+            break;
+        case G_MV_L6:
+            gSPLight( w1, LIGHT_7 );
+            break;
+        case G_MV_L7:
+            gSPLight( w1, LIGHT_8 );
+            break;
+        case G_MV_LOOKATX:
+            break;
+        case G_MV_LOOKATY:
+            break;
+    }
+}
+
+void F3D_Vtx( u32 w0, u32 w1 )
+{
+    gSPVertex( w1, _SHIFTR( w0, 20, 4 ) + 1, _SHIFTR( w0, 16, 4 ) );
+}
+
+void F3D_Reserved1( u32 w0, u32 w1 )
+{
+}
+
+void F3D_DList( u32 w0, u32 w1 )
+{
+    switch (_SHIFTR( w0, 16, 8 ))
+    {
+        case G_DL_PUSH:
+            gSPDisplayList( w1 );
+            break;
+        case G_DL_NOPUSH:
+            gSPBranchList( w1 );
+            break;
+    }
+
+#ifdef __TRIBUFFER_OPT
+    //since PCi can be changed in gSPDisplayList
+    gSPFlushTriangles();
+#endif
+}
+
+void F3D_Reserved2( u32 w0, u32 w1 )
+{
+}
+
+void F3D_Reserved3( u32 w0, u32 w1 )
+{
+}
+
+void F3D_Sprite2D_Base( u32 w0, u32 w1 )
+{
+    //gSPSprite2DBase( w1 );
+    RSP.PC[RSP.PCi] += 8;
+}
+
+
+void F3D_Tri1( u32 w0, u32 w1 )
+{
+    gSP1Triangle( _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, _SHIFTR( w1, 0, 8 ) / 10);
+}
+
+void F3D_CullDL( u32 w0, u32 w1 )
+{
+    gSPCullDisplayList( _SHIFTR( w0, 0, 24 ) / 40, (w1 / 40) - 1 );
+}
+
+void F3D_PopMtx( u32 w0, u32 w1 )
+{
+    gSPPopMatrix( w1 );
+}
+
+void F3D_MoveWord( u32 w0, u32 w1 )
+{
+    switch (_SHIFTR( w0, 0, 8 ))
+    {
+        case G_MW_MATRIX:
+            gSPInsertMatrix( _SHIFTR( w0, 8, 16 ), w1 );
+            break;
+
+        case G_MW_NUMLIGHT:
+            gSPNumLights( ((w1 - 0x80000000) >> 5) - 1 );
+            break;
+
+        case G_MW_CLIP:
+            gSPClipRatio( w1 );
+            break;
+
+        case G_MW_SEGMENT:
+            gSPSegment( _SHIFTR( w0, 8, 16 ) >> 2, w1 & 0x00FFFFFF );
+            break;
+
+        case G_MW_FOG:
+            gSPFogFactor( (s16)_SHIFTR( w1, 16, 16 ), (s16)_SHIFTR( w1, 0, 16 ) );
+            break;
+
+        case G_MW_LIGHTCOL:
+            switch (_SHIFTR( w0, 8, 16 ))
+            {
+                case F3D_MWO_aLIGHT_1:
+                    gSPLightColor( LIGHT_1, w1 );
+                    break;
+                case F3D_MWO_aLIGHT_2:
+                    gSPLightColor( LIGHT_2, w1 );
+                    break;
+                case F3D_MWO_aLIGHT_3:
+                    gSPLightColor( LIGHT_3, w1 );
+                    break;
+                case F3D_MWO_aLIGHT_4:
+                    gSPLightColor( LIGHT_4, w1 );
+                    break;
+                case F3D_MWO_aLIGHT_5:
+                    gSPLightColor( LIGHT_5, w1 );
+                    break;
+                case F3D_MWO_aLIGHT_6:
+                    gSPLightColor( LIGHT_6, w1 );
+                    break;
+                case F3D_MWO_aLIGHT_7:
+                    gSPLightColor( LIGHT_7, w1 );
+                    break;
+                case F3D_MWO_aLIGHT_8:
+                    gSPLightColor( LIGHT_8, w1 );
+                    break;
+            }
+            break;
+        case G_MW_POINTS:
+            gSPModifyVertex( _SHIFTR( w0, 8, 16 ) / 40, _SHIFTR( w0, 0, 8 ) % 40, w1 );
+            break;
+        case G_MW_PERSPNORM:
+            gSPPerspNormalize( w1 );
+            break;
+    }
+}
+
+void F3D_Texture( u32 w0, u32 w1 )
+{
+    gSPTexture( _FIXED2FLOAT( _SHIFTR( w1, 16, 16 ), 16 ),
+                _FIXED2FLOAT( _SHIFTR( w1, 0, 16 ), 16 ),
+                _SHIFTR( w0, 11, 3 ),
+                _SHIFTR( w0, 8, 3 ),
+                _SHIFTR( w0, 0, 8 ) );
+}
+
+void F3D_SetOtherMode_H( u32 w0, u32 w1 )
+{
+    switch (_SHIFTR( w0, 8, 8 ))
+    {
+        case G_MDSFT_PIPELINE:
+            gDPPipelineMode( w1 >> G_MDSFT_PIPELINE );
+            break;
+        case G_MDSFT_CYCLETYPE:
+            gDPSetCycleType( w1 >> G_MDSFT_CYCLETYPE );
+            break;
+        case G_MDSFT_TEXTPERSP:
+            gDPSetTexturePersp( w1 >> G_MDSFT_TEXTPERSP );
+            break;
+        case G_MDSFT_TEXTDETAIL:
+            gDPSetTextureDetail( w1 >> G_MDSFT_TEXTDETAIL );
+            break;
+        case G_MDSFT_TEXTLOD:
+            gDPSetTextureLOD( w1 >> G_MDSFT_TEXTLOD );
+            break;
+        case G_MDSFT_TEXTLUT:
+            gDPSetTextureLUT( w1 >> G_MDSFT_TEXTLUT );
+            break;
+        case G_MDSFT_TEXTFILT:
+            gDPSetTextureFilter( w1 >> G_MDSFT_TEXTFILT );
+            break;
+        case G_MDSFT_TEXTCONV:
+            gDPSetTextureConvert( w1 >> G_MDSFT_TEXTCONV );
+            break;
+        case G_MDSFT_COMBKEY:
+            gDPSetCombineKey( w1 >> G_MDSFT_COMBKEY );
+            break;
+        case G_MDSFT_RGBDITHER:
+            gDPSetColorDither( w1 >> G_MDSFT_RGBDITHER );
+            break;
+        case G_MDSFT_ALPHADITHER:
+            gDPSetAlphaDither( w1 >> G_MDSFT_ALPHADITHER );
+            break;
+        default:
+            u32 shift = _SHIFTR( w0, 8, 8 );
+            u32 length = _SHIFTR( w0, 0, 8 );
+            u32 mask = ((1 << length) - 1) << shift;
+
+            gDP.otherMode.h &= ~mask;
+            gDP.otherMode.h |= w1 & mask;
+
+            gDP.changed |= CHANGED_CYCLETYPE;
+            break;
+    }
+}
+
+void F3D_SetOtherMode_L( u32 w0, u32 w1 )
+{
+    switch (_SHIFTR( w0, 8, 8 ))
+    {
+        case G_MDSFT_ALPHACOMPARE:
+            gDPSetAlphaCompare( w1 >> G_MDSFT_ALPHACOMPARE );
+            break;
+        case G_MDSFT_ZSRCSEL:
+            gDPSetDepthSource( w1 >> G_MDSFT_ZSRCSEL );
+            break;
+        case G_MDSFT_RENDERMODE:
+            gDPSetRenderMode( w1 & 0xCCCCFFFF, w1 & 0x3333FFFF );
+            break;
+        default:
+            u32 shift = _SHIFTR( w0, 8, 8 );
+            u32 length = _SHIFTR( w0, 0, 8 );
+            u32 mask = ((1 << length) - 1) << shift;
+
+            gDP.otherMode.l &= ~mask;
+            gDP.otherMode.l |= w1 & mask;
+
+            gDP.changed |= CHANGED_RENDERMODE | CHANGED_ALPHACOMPARE;
+            break;
+    }
+}
+
+void F3D_EndDL( u32 w0, u32 w1 )
+{
+    gSPEndDisplayList();
+}
+
+void F3D_SetGeometryMode( u32 w0, u32 w1 )
+{
+    gSPSetGeometryMode( w1 );
+}
+
+void F3D_ClearGeometryMode( u32 w0, u32 w1 )
+{
+    gSPClearGeometryMode( w1 );
+}
+
+void F3D_Line3D( u32 w0, u32 w1 )
+{
+    // Hmmm...
+}
+
+void F3D_Quad( u32 w0, u32 w1 )
+{
+    gSP1Quadrangle( _SHIFTR( w1, 24, 8 ) / 10, _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, _SHIFTR( w1, 0, 8 ) / 10 );
+}
+
+void F3D_RDPHalf_1( u32 w0, u32 w1 )
+{
+    gDP.half_1 = w1;
+}
+
+void F3D_RDPHalf_2( u32 w0, u32 w1 )
+{
+    gDP.half_2 = w1;
+}
+
+void F3D_RDPHalf_Cont( u32 w0, u32 w1 )
+{
+}
+
+void F3D_Tri4( u32 w0, u32 w1 )
+{
+    gSP4Triangles( _SHIFTR( w0,  0, 4 ), _SHIFTR( w1,  0, 4 ), _SHIFTR( w1,  4, 4 ),
+                   _SHIFTR( w0,  4, 4 ), _SHIFTR( w1,  8, 4 ), _SHIFTR( w1, 12, 4 ),
+                   _SHIFTR( w0,  8, 4 ), _SHIFTR( w1, 16, 4 ), _SHIFTR( w1, 20, 4 ),
+                   _SHIFTR( w0, 12, 4 ), _SHIFTR( w1, 24, 4 ), _SHIFTR( w1, 28, 4 ) );
+}
+
+void F3D_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3D );
+
+    GBI.PCStackSize = 10;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_MTX,                  F3D_MTX,                F3D_Mtx );
+    GBI_SetGBI( G_RESERVED0,            F3D_RESERVED0,          F3D_Reserved0 );
+    GBI_SetGBI( G_MOVEMEM,              F3D_MOVEMEM,            F3D_MoveMem );
+    GBI_SetGBI( G_VTX,                  F3D_VTX,                F3D_Vtx );
+    GBI_SetGBI( G_RESERVED1,            F3D_RESERVED1,          F3D_Reserved1 );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_RESERVED2,            F3D_RESERVED2,          F3D_Reserved2 );
+    GBI_SetGBI( G_RESERVED3,            F3D_RESERVED3,          F3D_Reserved3 );
+    GBI_SetGBI( G_SPRITE2D_BASE,        F3D_SPRITE2D_BASE,      F3D_Sprite2D_Base );
+
+    GBI_SetGBI( G_TRI1,                 F3D_TRI1,               F3D_Tri1 );
+    GBI_SetGBI( G_CULLDL,               F3D_CULLDL,             F3D_CullDL );
+    GBI_SetGBI( G_POPMTX,               F3D_POPMTX,             F3D_PopMtx );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3D_MoveWord );
+    GBI_SetGBI( G_TEXTURE,              F3D_TEXTURE,            F3D_Texture );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_SETGEOMETRYMODE,      F3D_SETGEOMETRYMODE,    F3D_SetGeometryMode );
+    GBI_SetGBI( G_CLEARGEOMETRYMODE,    F3D_CLEARGEOMETRYMODE,  F3D_ClearGeometryMode );
+    GBI_SetGBI( G_QUAD,                 F3D_QUAD,               F3D_Quad );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_RDPHALF_CONT,         F3D_RDPHALF_CONT,       F3D_RDPHalf_Cont );
+    GBI_SetGBI( G_TRI4,                 F3D_TRI4,               F3D_Tri4 );
+
+}
+
diff --git a/source/gles2n64/src/F3D.h b/source/gles2n64/src/F3D.h
new file mode 100644 (file)
index 0000000..6efb49c
--- /dev/null
@@ -0,0 +1,99 @@
+#ifndef F3D_H
+#define F3D_H
+#include "Types.h"
+
+#define F3D_MTX_STACKSIZE       10
+
+#define F3D_MTX_MODELVIEW       0x00
+#define F3D_MTX_PROJECTION      0x01
+#define F3D_MTX_MUL             0x00
+#define F3D_MTX_LOAD            0x02
+#define F3D_MTX_NOPUSH          0x00
+#define F3D_MTX_PUSH            0x04
+
+#define F3D_TEXTURE_ENABLE      0x00000002
+#define F3D_SHADING_SMOOTH      0x00000200
+#define F3D_CULL_FRONT          0x00001000
+#define F3D_CULL_BACK           0x00002000
+#define F3D_CULL_BOTH           0x00003000
+#define F3D_CLIPPING            0x00000000
+
+#define F3D_MV_VIEWPORT         0x80
+
+#define F3D_MWO_aLIGHT_1        0x00
+#define F3D_MWO_bLIGHT_1        0x04
+#define F3D_MWO_aLIGHT_2        0x20
+#define F3D_MWO_bLIGHT_2        0x24
+#define F3D_MWO_aLIGHT_3        0x40
+#define F3D_MWO_bLIGHT_3        0x44
+#define F3D_MWO_aLIGHT_4        0x60
+#define F3D_MWO_bLIGHT_4        0x64
+#define F3D_MWO_aLIGHT_5        0x80
+#define F3D_MWO_bLIGHT_5        0x84
+#define F3D_MWO_aLIGHT_6        0xa0
+#define F3D_MWO_bLIGHT_6        0xa4
+#define F3D_MWO_aLIGHT_7        0xc0
+#define F3D_MWO_bLIGHT_7        0xc4
+#define F3D_MWO_aLIGHT_8        0xe0
+#define F3D_MWO_bLIGHT_8        0xe4
+
+// FAST3D commands
+#define F3D_SPNOOP              0x00
+#define F3D_MTX                 0x01
+#define F3D_RESERVED0           0x02
+#define F3D_MOVEMEM             0x03
+#define F3D_VTX                 0x04
+#define F3D_RESERVED1           0x05
+#define F3D_DL                  0x06
+#define F3D_RESERVED2           0x07
+#define F3D_RESERVED3           0x08
+#define F3D_SPRITE2D_BASE       0x09
+
+#define F3D_TRI1                0xBF
+#define F3D_CULLDL              0xBE
+#define F3D_POPMTX              0xBD
+#define F3D_MOVEWORD            0xBC
+#define F3D_TEXTURE             0xBB
+#define F3D_SETOTHERMODE_H      0xBA
+#define F3D_SETOTHERMODE_L      0xB9
+#define F3D_ENDDL               0xB8
+#define F3D_SETGEOMETRYMODE     0xB7
+#define F3D_CLEARGEOMETRYMODE   0xB6
+//#define F3D_LINE3D                0xB5 // Only used in Line3D
+#define F3D_QUAD                0xB5
+#define F3D_RDPHALF_1           0xB4
+#define F3D_RDPHALF_2           0xB3
+#define F3D_RDPHALF_CONT        0xB2
+#define F3D_TRI4                0xB1
+
+#define F3D_TRI_UNKNOWN         0xC0
+
+void F3D_SPNoOp( u32 w0, u32 w1 );
+void F3D_Mtx( u32 w0, u32 w1 );
+void F3D_Reserved0( u32 w0, u32 w1 );
+void F3D_MoveMem( u32 w0, u32 w1 );
+void F3D_Vtx( u32 w0, u32 w1 );
+void F3D_Reserved1( u32 w0, u32 w1 );
+void F3D_DList( u32 w0, u32 w1 );
+void F3D_Reserved2( u32 w0, u32 w1 );
+void F3D_Reserved3( u32 w0, u32 w1 );
+void F3D_Sprite2D_Base( u32 w0, u32 w1 );
+void F3D_Tri1( u32 w0, u32 w1 );
+void F3D_CullDL( u32 w0, u32 w1 );
+void F3D_PopMtx( u32 w0, u32 w1 );
+void F3D_MoveWord( u32 w0, u32 w1 );
+void F3D_Texture( u32 w0, u32 w1 );
+void F3D_SetOtherMode_H( u32 w0, u32 w1 );
+void F3D_SetOtherMode_L( u32 w0, u32 w1 );
+void F3D_EndDL( u32 w0, u32 w1 );
+void F3D_SetGeometryMode( u32 w0, u32 w1 );
+void F3D_ClearGeometryMode( u32 w0, u32 w1 );
+//void F3D_Line3D( u32 w0, u32 w1 );
+void F3D_Quad( u32 w0, u32 w1 );
+void F3D_RDPHalf_1( u32 w0, u32 w1 );
+void F3D_RDPHalf_2( u32 w0, u32 w1 );
+void F3D_RDPHalf_Cont( u32 w0, u32 w1 );
+void F3D_Tri4( u32 w0, u32 w1 );
+void F3D_Init();
+#endif
+
diff --git a/source/gles2n64/src/F3DCBFD.cpp b/source/gles2n64/src/F3DCBFD.cpp
new file mode 100644 (file)
index 0000000..d96449b
--- /dev/null
@@ -0,0 +1,212 @@
+#include "Common.h"
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "F3DEX2.h"
+#include "F3DCBFD.h"
+#include "S2DEX.h"
+#include "S2DEX2.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+#include "OpenGL.h"
+#include "Config.h"
+
+//BASED ON GLIDE64 Implementation
+
+u32 normal_address = 0;
+
+void F3DCBFD_Vtx(u32 w0, u32 w1)
+{
+
+       s32 v0, n;
+    u32 address;
+       n = (w0 >> 12)&0xFF;
+       v0 = ((w0 >> 1)&0x7F) - n;
+       address = RSP_SegmentToPhysical(w1);
+
+       if (v0 < 0)
+       {
+               return;
+       }
+
+    gSPFlushTriangles();
+
+    Vertex* vertex = (Vertex*)&RDRAM[address];
+
+       for (s32 i=0; i < n; i++)
+       {
+        u32 v;
+#ifdef __TRIBUFFER_OPT
+        v = __indexmap_getnew(i, 1);
+#else
+        v = i;
+#endif
+
+               OGL.triangles.vertices[v].x = vertex->x;
+               OGL.triangles.vertices[v].y = vertex->y;
+               OGL.triangles.vertices[v].z = vertex->z;
+               OGL.triangles.vertices[v].w = 1.0f;
+
+               OGL.triangles.vertices[v].s = _FIXED2FLOAT(vertex->s, 5);
+               OGL.triangles.vertices[v].t = _FIXED2FLOAT(vertex->t, 5);
+
+        if (config.enableLighting && gSP.geometryMode & G_LIGHTING)
+               {
+                       OGL.triangles.vertices[v].nx = ((s8*)RDRAM)[(normal_address + (i<<2) + (v0<<1) + 0)^3];
+                       OGL.triangles.vertices[v].ny = ((s8*)RDRAM)[(normal_address + (i<<2) + (v0<<1) + 1)^3];
+                       OGL.triangles.vertices[v].nz = (s8)(vertex->flag&0xff);
+               }
+
+        gSPProcessVertex(v);
+
+        if (config.enableLighting && gSP.geometryMode & G_LIGHTING)
+               {
+            OGL.triangles.vertices[v].r = OGL.triangles.vertices[v].r * vertex->color.r * 0.0039215689f;
+            OGL.triangles.vertices[v].g = OGL.triangles.vertices[v].g * vertex->color.g * 0.0039215689f;
+            OGL.triangles.vertices[v].b = OGL.triangles.vertices[v].b * vertex->color.b * 0.0039215689f;
+            OGL.triangles.vertices[v].a = OGL.triangles.vertices[v].a * vertex->color.a * 0.0039215689f;
+               }
+               else
+               {
+            OGL.triangles.vertices[v].r = vertex->color.r * 0.0039215689f;
+            OGL.triangles.vertices[v].g = vertex->color.g * 0.0039215689f;
+            OGL.triangles.vertices[v].b = vertex->color.b * 0.0039215689f;
+            OGL.triangles.vertices[v].a = vertex->color.a * 0.0039215689f;
+               }
+               vertex++;
+    }
+}
+
+void F3DCBFD_MoveWord(u32 w0, u32 w1)
+{
+       u8 index = (u8)((w0 >> 16) & 0xFF);
+       u16 offset = (u16)(w0 & 0xFFFF);
+
+       switch (index)
+       {
+        case G_MW_NUMLIGHT:
+            gSPNumLights(w1 / 48);
+            break;
+
+        case G_MW_CLIP:
+            if (offset == 0x04)
+            {
+                gSPClipRatio( w1 );
+            }
+            break;
+
+        case G_MW_SEGMENT:
+            gSPSegment(_SHIFTR(offset, 2, 4), w1);
+            break;
+
+        case G_MW_FOG:
+            gSPFogFactor( (s16)_SHIFTR( w1, 16, 16 ), (s16)_SHIFTR( w1, 0, 16 ) );
+            break;
+
+        case G_MV_COORDMOD:  // moveword coord mod
+            break;
+
+        default:
+            break;
+    }
+}
+
+#define F3DCBFD_MV_VIEWPORT     8
+#define F3DCBFD_MV_LIGHT        10
+#define F3DCBFD_MV_NORMAL       14
+
+void F3DCBFD_MoveMem(u32 w0, u32 w1)
+{
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+    switch (_SHIFTR( w0, 0, 8 ))
+    {
+        case F3DCBFD_MV_VIEWPORT:
+            gSPViewport(w1);
+            break;
+
+        case F3DCBFD_MV_LIGHT:
+        {
+            u32 offset = _SHIFTR( w0, 8, 8 ) << 3;
+            if (offset >= 48)
+            {
+                gSPLight( w1, (offset - 24) / 24);
+            }
+            break;
+        }
+
+        case F3DCBFD_MV_NORMAL:
+                       normal_address = RSP_SegmentToPhysical(w1);
+            break;
+
+    }
+}
+
+void F3DCBFD_Tri4(u32 w0, u32 w1)
+{
+    gSP4Triangles( _SHIFTR(w0, 23, 5), _SHIFTR(w0, 18, 5), (_SHIFTR(w0, 15, 3 ) << 2) | _SHIFTR(w1, 30, 2),
+                   _SHIFTR(w0, 10, 5), _SHIFTR(w0, 5, 5), _SHIFTR(w1, 0, 5),
+                   _SHIFTR(w1, 25, 5), _SHIFTR(w1, 20, 5), _SHIFTR(w1, 15, 5),
+                   _SHIFTR(w1, 10, 5), _SHIFTR(w1, 5, 5), _SHIFTR(w1, 0, 5));
+}
+
+
+void F3DCBFD_Init()
+{
+    LOG(LOG_VERBOSE, "USING CBFD ucode!\n");
+
+    // Set GeometryMode flags
+    GBI_InitFlags(F3DEX2);
+
+    GBI.PCStackSize = 10;
+
+    // GBI Command                      Command Value               Command Function
+    GBI_SetGBI( G_RDPHALF_2,            F3DEX2_RDPHALF_2,           F3D_RDPHalf_2 );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3DEX2_SETOTHERMODE_H,      F3DEX2_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3DEX2_SETOTHERMODE_L,      F3DEX2_SetOtherMode_L );
+    GBI_SetGBI( G_RDPHALF_1,            F3DEX2_RDPHALF_1,           F3D_RDPHalf_1 );
+    GBI_SetGBI( G_SPNOOP,               F3DEX2_SPNOOP,              F3D_SPNoOp );
+    GBI_SetGBI( G_ENDDL,                F3DEX2_ENDDL,               F3D_EndDL );
+    GBI_SetGBI( G_DL,                   F3DEX2_DL,                  F3D_DList );
+    GBI_SetGBI( G_LOAD_UCODE,           F3DEX2_LOAD_UCODE,          F3DEX_Load_uCode );
+    GBI_SetGBI( G_MOVEMEM,              F3DEX2_MOVEMEM,             F3DCBFD_MoveMem);
+    GBI_SetGBI( G_MOVEWORD,             F3DEX2_MOVEWORD,            F3DCBFD_MoveWord);
+    GBI_SetGBI( G_MTX,                  F3DEX2_MTX,                 F3DEX2_Mtx );
+    GBI_SetGBI( G_GEOMETRYMODE,         F3DEX2_GEOMETRYMODE,        F3DEX2_GeometryMode );
+    GBI_SetGBI( G_POPMTX,               F3DEX2_POPMTX,              F3DEX2_PopMtx );
+    GBI_SetGBI( G_TEXTURE,              F3DEX2_TEXTURE,             F3DEX2_Texture );
+    GBI_SetGBI( G_DMA_IO,               F3DEX2_DMA_IO,              F3DEX2_DMAIO );
+    GBI_SetGBI( G_SPECIAL_1,            F3DEX2_SPECIAL_1,           F3DEX2_Special_1 );
+    GBI_SetGBI( G_SPECIAL_2,            F3DEX2_SPECIAL_2,           F3DEX2_Special_2 );
+    GBI_SetGBI( G_SPECIAL_3,            F3DEX2_SPECIAL_3,           F3DEX2_Special_3 );
+
+
+
+    GBI_SetGBI(G_VTX,                   F3DEX2_VTX,                 F3DCBFD_Vtx);
+    GBI_SetGBI(G_MODIFYVTX,             F3DEX2_MODIFYVTX,           F3DEX_ModifyVtx);
+    GBI_SetGBI(G_CULLDL,                F3DEX2_CULLDL,              F3DEX_CullDL);
+    GBI_SetGBI(G_BRANCH_Z,              F3DEX2_BRANCH_Z,            F3DEX_Branch_Z);
+    GBI_SetGBI(G_TRI1,                  F3DEX2_TRI1,                F3DEX2_Tri1);
+    GBI_SetGBI(G_TRI2,                  F3DEX2_TRI2,                F3DEX_Tri2);
+    GBI_SetGBI(G_QUAD,                  F3DEX2_QUAD,                F3DEX2_Quad);
+//  GBI_SetGBI( G_LINE3D,               F3DEX2_LINE3D,              F3DEX2_Line3D );
+
+    //for some reason glide64 maps TRI4 to these locations:
+
+    for(int i = 0x10; i <= 0x1F; i++)
+    {
+        GBI_SetGBI(G_TRI4, i, F3DCBFD_Tri4);
+    }
+
+    GBI_SetGBI( G_BG_1CYC,              S2DEX2_BG_1CYC,             S2DEX_BG_1Cyc);
+    GBI_SetGBI( G_BG_COPY,              S2DEX2_BG_COPY,             S2DEX_BG_Copy);
+    GBI_SetGBI( G_OBJ_RENDERMODE,       S2DEX2_OBJ_RENDERMODE,      S2DEX_Obj_RenderMode);
+
+}
+
diff --git a/source/gles2n64/src/F3DCBFD.h b/source/gles2n64/src/F3DCBFD.h
new file mode 100644 (file)
index 0000000..8984135
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef F3DCBFD_H
+#define F3DCBFD_H
+
+
+void F3DCBFD_Init();
+#endif
+
diff --git a/source/gles2n64/src/F3DDKR.cpp b/source/gles2n64/src/F3DDKR.cpp
new file mode 100644 (file)
index 0000000..6d4afe5
--- /dev/null
@@ -0,0 +1,124 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DDKR.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+#include "OpenGL.h"
+
+
+void F3DDKR_DMA_Mtx( u32 w0, u32 w1 )
+{
+    if (_SHIFTR( w0, 0, 16 ) != 64)
+    {
+//      GBI_DetectUCode(); // Something's wrong
+#ifdef DEBUG
+    DebugMsg( DEBUG_MEDIUM | DEBUG_HIGH | DEBUG_ERROR, "G_MTX: address = 0x%08X    length = %i    params = 0x%02X\n", w1, _SHIFTR( w0, 0, 16 ), _SHIFTR( w0, 16, 8 ) );
+#endif
+        return;
+    }
+
+    u32 index = _SHIFTR( w0, 16, 4 );
+    u32 multiply;
+
+    if (index == 0) // DKR
+    {
+        index = _SHIFTR( w0, 22, 2 );
+        multiply = 0;
+    }
+    else // Gemini
+    {
+        multiply = _SHIFTR( w0, 23, 1 );
+    }
+
+    gSPDMAMatrix( w1, index, multiply );
+}
+
+void F3DDKR_DMA_Vtx( u32 w0, u32 w1 )
+{
+    if ((w0 & F3DDKR_VTX_APPEND))
+    {
+        if (gSP.matrix.billboard)
+            gSP.vertexi = 1;
+    }
+    else
+        gSP.vertexi = 0;
+
+    u32 n = _SHIFTR( w0, 19, 5 ) + 1;
+
+    gSPDMAVertex( w1, n, gSP.vertexi + _SHIFTR( w0, 9, 5 ) );
+
+    gSP.vertexi += n;
+}
+
+void F3DDKR_DMA_Tri( u32 w0, u32 w1 )
+{
+    gSPDMATriangles( w1, _SHIFTR( w0, 4, 12 ) );
+    gSP.vertexi = 0;
+}
+
+void F3DDKR_DMA_DList( u32 w0, u32 w1 )
+{
+    gSPDMADisplayList( w1, _SHIFTR( w0, 16, 8 ) );
+}
+
+void F3DDKR_DMA_Offsets( u32 w0, u32 w1 )
+{
+    gSPSetDMAOffsets( _SHIFTR( w0, 0, 24 ), _SHIFTR( w1, 0, 24 ) );
+}
+
+void F3DDKR_MoveWord( u32 w0, u32 w1 )
+{
+    switch (_SHIFTR( w0, 0, 8 ))
+    {
+        case 0x02:
+            gSP.matrix.billboard = w1 & 1;
+            break;
+        case 0x0A:
+            gSP.matrix.modelViewi = _SHIFTR( w1, 6, 2 );
+            gSP.changed |= CHANGED_MATRIX;
+            break;
+        default:
+            F3D_MoveWord( w0, w1 );
+            break;
+    }
+}
+
+void F3DDKR_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3D );
+
+    GBI.PCStackSize = 10;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_DMA_MTX,              F3DDKR_DMA_MTX,         F3DDKR_DMA_Mtx );
+    GBI_SetGBI( G_MOVEMEM,              F3D_MOVEMEM,            F3D_MoveMem );
+    GBI_SetGBI( G_DMA_VTX,              F3DDKR_DMA_VTX,         F3DDKR_DMA_Vtx );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_DMA_DL,               F3DDKR_DMA_DL,          F3DDKR_DMA_DList );
+    GBI_SetGBI( G_DMA_TRI,              F3DDKR_DMA_TRI,         F3DDKR_DMA_Tri );
+
+    GBI_SetGBI( G_DMA_OFFSETS,          F3DDKR_DMA_OFFSETS,     F3DDKR_DMA_Offsets );
+    GBI_SetGBI( G_CULLDL,               F3D_CULLDL,             F3D_CullDL );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3DDKR_MoveWord );
+    GBI_SetGBI( G_TEXTURE,              F3D_TEXTURE,            F3D_Texture );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_SETGEOMETRYMODE,      F3D_SETGEOMETRYMODE,    F3D_SetGeometryMode );
+    GBI_SetGBI( G_CLEARGEOMETRYMODE,    F3D_CLEARGEOMETRYMODE,  F3D_ClearGeometryMode );
+    GBI_SetGBI( G_QUAD,                 F3D_QUAD,               F3D_Quad );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_RDPHALF_CONT,         F3D_RDPHALF_CONT,       F3D_RDPHalf_Cont );
+    GBI_SetGBI( G_TRI4,                 F3D_TRI4,               F3D_Tri4 );
+
+    gSPSetDMAOffsets( 0, 0 );
+}
+
diff --git a/source/gles2n64/src/F3DDKR.h b/source/gles2n64/src/F3DDKR.h
new file mode 100644 (file)
index 0000000..d70ea46
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef F3DDKR_H
+#define F3DDKR_H
+
+#define F3DDKR_VTX_APPEND       0x00010000
+
+#define F3DDKR_DMA_MTX          0x01
+#define F3DDKR_DMA_VTX          0x04
+#define F3DDKR_DMA_TRI          0x05
+#define F3DDKR_DMA_DL           0x07
+#define F3DDKR_DMA_OFFSETS      0xBF
+
+void F3DDKR_Init();
+#endif
+
diff --git a/source/gles2n64/src/F3DEX.cpp b/source/gles2n64/src/F3DEX.cpp
new file mode 100644 (file)
index 0000000..53fcd63
--- /dev/null
@@ -0,0 +1,90 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+
+void F3DEX_Vtx( u32 w0, u32 w1 )
+{
+    gSPVertex( w1, _SHIFTR( w0, 10, 6 ), _SHIFTR( w0, 17, 7 ) );
+}
+
+void F3DEX_Tri1( u32 w0, u32 w1 )
+{
+    gSP1Triangle( _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), _SHIFTR( w1, 1, 7 ));
+}
+
+void F3DEX_CullDL( u32 w0, u32 w1 )
+{
+    gSPCullDisplayList( _SHIFTR( w0, 1, 15 ), _SHIFTR( w1, 1, 15 ) );
+}
+
+void F3DEX_ModifyVtx( u32 w0, u32 w1 )
+{
+    gSPModifyVertex( _SHIFTR( w0, 1, 15 ), _SHIFTR( w0, 16, 8 ), w1 );
+}
+
+void F3DEX_Tri2( u32 w0, u32 w1 )
+{
+    gSP2Triangles( _SHIFTR( w0, 17, 7 ), _SHIFTR( w0, 9, 7 ), _SHIFTR( w0, 1, 7 ), 0,
+                  _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), _SHIFTR( w1, 1, 7 ), 0);
+}
+
+void F3DEX_Quad( u32 w0, u32 w1 )
+{
+    gSP1Quadrangle( _SHIFTR( w1, 25, 7 ), _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), _SHIFTR( w1, 1, 7 ) );
+}
+
+void F3DEX_Branch_Z( u32 w0, u32 w1 )
+{
+    gSPBranchLessZ( gDP.half_1, _SHIFTR( w0, 1, 11 ), (s32)w1 );
+}
+
+void F3DEX_Load_uCode( u32 w0, u32 w1 )
+{
+    gSPLoadUcodeEx( w1, gDP.half_1, _SHIFTR( w0, 0, 16 ) + 1 );
+}
+
+void F3DEX_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3DEX );
+
+    GBI.PCStackSize = 18;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_MTX,                  F3D_MTX,                F3D_Mtx );
+    GBI_SetGBI( G_RESERVED0,            F3D_RESERVED0,          F3D_Reserved0 );
+    GBI_SetGBI( G_MOVEMEM,              F3D_MOVEMEM,            F3D_MoveMem );
+    GBI_SetGBI( G_VTX,                  F3D_VTX,                F3DEX_Vtx );
+    GBI_SetGBI( G_RESERVED1,            F3D_RESERVED1,          F3D_Reserved1 );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_RESERVED2,            F3D_RESERVED2,          F3D_Reserved2 );
+    GBI_SetGBI( G_RESERVED3,            F3D_RESERVED3,          F3D_Reserved3 );
+    GBI_SetGBI( G_SPRITE2D_BASE,        F3D_SPRITE2D_BASE,      F3D_Sprite2D_Base );
+
+    GBI_SetGBI( G_TRI1,                 F3D_TRI1,               F3DEX_Tri1 );
+    GBI_SetGBI( G_CULLDL,               F3D_CULLDL,             F3DEX_CullDL );
+    GBI_SetGBI( G_POPMTX,               F3D_POPMTX,             F3D_PopMtx );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3D_MoveWord );
+    GBI_SetGBI( G_TEXTURE,              F3D_TEXTURE,            F3D_Texture );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_SETGEOMETRYMODE,      F3D_SETGEOMETRYMODE,    F3D_SetGeometryMode );
+    GBI_SetGBI( G_CLEARGEOMETRYMODE,    F3D_CLEARGEOMETRYMODE,  F3D_ClearGeometryMode );
+    GBI_SetGBI( G_QUAD,                 F3D_QUAD,               F3DEX_Quad );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_MODIFYVTX,            F3DEX_MODIFYVTX,        F3DEX_ModifyVtx );
+    GBI_SetGBI( G_TRI2,                 F3DEX_TRI2,             F3DEX_Tri2 );
+    GBI_SetGBI( G_BRANCH_Z,             F3DEX_BRANCH_Z,         F3DEX_Branch_Z );
+    GBI_SetGBI( G_LOAD_UCODE,           F3DEX_LOAD_UCODE,       F3DEX_Load_uCode );
+}
+
diff --git a/source/gles2n64/src/F3DEX.h b/source/gles2n64/src/F3DEX.h
new file mode 100644 (file)
index 0000000..44d38f0
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef F3DEX_H
+#define F3DEX_H
+
+#define F3DEX_MTX_STACKSIZE     18
+
+#define F3DEX_MTX_MODELVIEW     0x00
+#define F3DEX_MTX_PROJECTION    0x01
+#define F3DEX_MTX_MUL           0x00
+#define F3DEX_MTX_LOAD          0x02
+#define F3DEX_MTX_NOPUSH        0x00
+#define F3DEX_MTX_PUSH          0x04
+
+#define F3DEX_TEXTURE_ENABLE    0x00000002
+#define F3DEX_SHADING_SMOOTH    0x00000200
+#define F3DEX_CULL_FRONT        0x00001000
+#define F3DEX_CULL_BACK         0x00002000
+#define F3DEX_CULL_BOTH         0x00003000
+#define F3DEX_CLIPPING          0x00800000
+
+#define F3DEX_MV_VIEWPORT       0x80
+
+#define F3DEX_MWO_aLIGHT_1      0x00
+#define F3DEX_MWO_bLIGHT_1      0x04
+#define F3DEX_MWO_aLIGHT_2      0x20
+#define F3DEX_MWO_bLIGHT_2      0x24
+#define F3DEX_MWO_aLIGHT_3      0x40
+#define F3DEX_MWO_bLIGHT_3      0x44
+#define F3DEX_MWO_aLIGHT_4      0x60
+#define F3DEX_MWO_bLIGHT_4      0x64
+#define F3DEX_MWO_aLIGHT_5      0x80
+#define F3DEX_MWO_bLIGHT_5      0x84
+#define F3DEX_MWO_aLIGHT_6      0xa0
+#define F3DEX_MWO_bLIGHT_6      0xa4
+#define F3DEX_MWO_aLIGHT_7      0xc0
+#define F3DEX_MWO_bLIGHT_7      0xc4
+#define F3DEX_MWO_aLIGHT_8      0xe0
+#define F3DEX_MWO_bLIGHT_8      0xe4
+
+// F3DEX commands
+#define F3DEX_MODIFYVTX             0xB2
+#define F3DEX_TRI2                  0xB1
+#define F3DEX_BRANCH_Z              0xB0
+#define F3DEX_LOAD_UCODE            0xAF // 0xCF
+
+void F3DEX_Vtx( u32 w0, u32 w1 );
+void F3DEX_Tri1( u32 w0, u32 w1 );
+void F3DEX_CullDL( u32 w0, u32 w1 );
+void F3DEX_ModifyVtx( u32 w0, u32 w1 );
+void F3DEX_Tri2( u32 w0, u32 w1 );
+void F3DEX_Branch_Z( u32 w0, u32 w1 );
+void F3DEX_Load_uCode( u32 w0, u32 w1 );
+void F3DEX_Init();
+#endif
+
diff --git a/source/gles2n64/src/F3DEX2.cpp b/source/gles2n64/src/F3DEX2.cpp
new file mode 100644 (file)
index 0000000..0d9f3c9
--- /dev/null
@@ -0,0 +1,255 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "F3DEX2.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+#include "OpenGL.h"
+
+#include "Config.h"
+
+void F3DEX2_Mtx( u32 w0, u32 w1 )
+{
+    gSPMatrix( w1, _SHIFTR( w0, 0, 8 ) ^ G_MTX_PUSH );
+}
+
+void F3DEX2_MoveMem( u32 w0, u32 w1 )
+{
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+    switch (_SHIFTR( w0, 0, 8 ))
+    {
+        case F3DEX2_MV_VIEWPORT:
+            gSPViewport( w1 );
+            break;
+
+        case G_MV_MATRIX:
+            gSPForceMatrix( w1 );
+            RSP.PC[RSP.PCi] += 8;             // force matrix takes two commands
+            break;
+
+        case G_MV_LIGHT:
+            u32 offset = _SHIFTR( w0, 8, 8 ) << 3;
+            if (offset >= 48)
+            {
+                gSPLight( w1, (offset - 24) / 24);
+            }
+            break;
+    }
+}
+
+void F3DEX2_Vtx( u32 w0, u32 w1 )
+{
+    u32 n = _SHIFTR( w0, 12, 8 );
+
+    gSPVertex( w1, n, _SHIFTR( w0, 1, 7 ) - n );
+}
+
+void F3DEX2_Reserved1( u32 w0, u32 w1 )
+{
+}
+
+void F3DEX2_Tri1( u32 w0, u32 w1 )
+{
+    gSP1Triangle( _SHIFTR( w0, 17, 7 ),
+                  _SHIFTR( w0, 9, 7 ),
+                  _SHIFTR( w0, 1, 7 ));
+}
+
+void F3DEX2_PopMtx( u32 w0, u32 w1 )
+{
+    gSPPopMatrixN( 0, w1 >> 6 );
+}
+
+void F3DEX2_MoveWord( u32 w0, u32 w1 )
+{
+    switch (_SHIFTR( w0, 16, 8 ))
+    {
+        case G_MW_FORCEMTX:
+            // Handled in movemem
+            break;
+        case G_MW_MATRIX:
+            gSPInsertMatrix( _SHIFTR( w0, 0, 16 ), w1 );
+            break;
+        case G_MW_NUMLIGHT:
+            gSPNumLights( w1 / 24 );
+            break;
+        case G_MW_CLIP:
+            gSPClipRatio( w1 );
+            break;
+        case G_MW_SEGMENT:
+            gSPSegment( _SHIFTR( w0, 0, 16 ) >> 2, w1 & 0x00FFFFFF );
+            break;
+        case G_MW_FOG:
+            gSPFogFactor( (s16)_SHIFTR( w1, 16, 16 ), (s16)_SHIFTR( w1, 0, 16 ) );
+            break;
+        case G_MW_LIGHTCOL:
+            gSPLightColor((_SHIFTR( w0, 0, 16 ) / 24) + 1, w1 );
+            break;
+        case G_MW_PERSPNORM:
+            gSPPerspNormalize( w1 );
+            break;
+    }
+}
+
+void F3DEX2_Texture( u32 w0, u32 w1 )
+{
+    gSPTexture( _FIXED2FLOAT( _SHIFTR( w1, 16, 16 ), 16 ),
+                _FIXED2FLOAT( _SHIFTR( w1, 0, 16 ), 16 ),
+                _SHIFTR( w0, 11, 3 ),
+                _SHIFTR( w0, 8, 3 ),
+                _SHIFTR( w0, 1, 7 ) );
+}
+
+void F3DEX2_SetOtherMode_H( u32 w0, u32 w1 )
+{
+    switch (32 - _SHIFTR( w0, 8, 8 ) - (_SHIFTR( w0, 0, 8 ) + 1))
+    {
+        case G_MDSFT_PIPELINE:
+            gDPPipelineMode( w1 >> G_MDSFT_PIPELINE );
+            break;
+        case G_MDSFT_CYCLETYPE:
+            gDPSetCycleType( w1 >> G_MDSFT_CYCLETYPE );
+            break;
+        case G_MDSFT_TEXTPERSP:
+            gDPSetTexturePersp( w1 >> G_MDSFT_TEXTPERSP );
+            break;
+        case G_MDSFT_TEXTDETAIL:
+            gDPSetTextureDetail( w1 >> G_MDSFT_TEXTDETAIL );
+            break;
+        case G_MDSFT_TEXTLOD:
+            gDPSetTextureLOD( w1 >> G_MDSFT_TEXTLOD );
+            break;
+        case G_MDSFT_TEXTLUT:
+            gDPSetTextureLUT( w1 >> G_MDSFT_TEXTLUT );
+            break;
+        case G_MDSFT_TEXTFILT:
+            gDPSetTextureFilter( w1 >> G_MDSFT_TEXTFILT );
+            break;
+        case G_MDSFT_TEXTCONV:
+            gDPSetTextureConvert( w1 >> G_MDSFT_TEXTCONV );
+            break;
+        case G_MDSFT_COMBKEY:
+            gDPSetCombineKey( w1 >> G_MDSFT_COMBKEY );
+            break;
+        case G_MDSFT_RGBDITHER:
+            gDPSetColorDither( w1 >> G_MDSFT_RGBDITHER );
+            break;
+        case G_MDSFT_ALPHADITHER:
+            gDPSetAlphaDither( w1 >> G_MDSFT_ALPHADITHER );
+            break;
+        default:
+            u32 length = _SHIFTR( w0, 0, 8 ) + 1;
+            u32 shift = 32 - _SHIFTR( w0, 8, 8 ) - length;
+            u32 mask = ((1 << length) - 1) << shift;
+
+            gDP.otherMode.h &= ~mask;
+            gDP.otherMode.h |= w1 & mask;
+
+            gDP.changed |= CHANGED_CYCLETYPE;
+            break;
+    }
+}
+
+void F3DEX2_SetOtherMode_L( u32 w0, u32 w1 )
+{
+    switch (32 - _SHIFTR( w0, 8, 8 ) - (_SHIFTR( w0, 0, 8 ) + 1))
+    {
+        case G_MDSFT_ALPHACOMPARE:
+            gDPSetAlphaCompare( w1 >> G_MDSFT_ALPHACOMPARE );
+            break;
+        case G_MDSFT_ZSRCSEL:
+            gDPSetDepthSource( w1 >> G_MDSFT_ZSRCSEL );
+            break;
+        case G_MDSFT_RENDERMODE:
+            gDPSetRenderMode( w1 & 0xCCCCFFFF, w1 & 0x3333FFFF );
+            break;
+        default:
+            u32 length = _SHIFTR( w0, 0, 8 ) + 1;
+            u32 shift = 32 - _SHIFTR( w0, 8, 8 ) - length;
+            u32 mask = ((1 << length) - 1) << shift;
+
+            gDP.otherMode.l &= ~mask;
+            gDP.otherMode.l |= w1 & mask;
+
+            gDP.changed |= CHANGED_RENDERMODE | CHANGED_ALPHACOMPARE;
+            break;
+    }
+}
+
+void F3DEX2_GeometryMode( u32 w0, u32 w1 )
+{
+    gSPGeometryMode( ~_SHIFTR( w0, 0, 24 ), w1 );
+}
+
+void F3DEX2_DMAIO( u32 w0, u32 w1 )
+{
+}
+
+void F3DEX2_Special_1( u32 w0, u32 w1 )
+{
+}
+
+void F3DEX2_Special_2( u32 w0, u32 w1 )
+{
+}
+
+void F3DEX2_Special_3( u32 w0, u32 w1 )
+{
+}
+
+void F3DEX2_Quad( u32 w0, u32 w1 )
+{
+    gSP2Triangles( _SHIFTR( w0, 17, 7 ),
+                   _SHIFTR( w0, 9, 7 ),
+                   _SHIFTR( w0, 1, 7 ),
+                   0,
+                   _SHIFTR( w1, 17, 7 ),
+                   _SHIFTR( w1, 9, 7 ),
+                   _SHIFTR( w1, 1, 7 ),
+                   0 );
+}
+
+void F3DEX2_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3DEX2 );
+
+    GBI.PCStackSize = 18;
+
+    // GBI Command                      Command Value               Command Function
+    GBI_SetGBI( G_RDPHALF_2,            F3DEX2_RDPHALF_2,           F3D_RDPHalf_2 );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3DEX2_SETOTHERMODE_H,      F3DEX2_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3DEX2_SETOTHERMODE_L,      F3DEX2_SetOtherMode_L );
+    GBI_SetGBI( G_RDPHALF_1,            F3DEX2_RDPHALF_1,           F3D_RDPHalf_1 );
+    GBI_SetGBI( G_SPNOOP,               F3DEX2_SPNOOP,              F3D_SPNoOp );
+    GBI_SetGBI( G_ENDDL,                F3DEX2_ENDDL,               F3D_EndDL );
+    GBI_SetGBI( G_DL,                   F3DEX2_DL,                  F3D_DList );
+    GBI_SetGBI( G_LOAD_UCODE,           F3DEX2_LOAD_UCODE,          F3DEX_Load_uCode );
+    GBI_SetGBI( G_MOVEMEM,              F3DEX2_MOVEMEM,             F3DEX2_MoveMem );
+    GBI_SetGBI( G_MOVEWORD,             F3DEX2_MOVEWORD,            F3DEX2_MoveWord );
+    GBI_SetGBI( G_MTX,                  F3DEX2_MTX,                 F3DEX2_Mtx );
+    GBI_SetGBI( G_GEOMETRYMODE,         F3DEX2_GEOMETRYMODE,        F3DEX2_GeometryMode );
+    GBI_SetGBI( G_POPMTX,               F3DEX2_POPMTX,              F3DEX2_PopMtx );
+    GBI_SetGBI( G_TEXTURE,              F3DEX2_TEXTURE,             F3DEX2_Texture );
+    GBI_SetGBI( G_DMA_IO,               F3DEX2_DMA_IO,              F3DEX2_DMAIO );
+    GBI_SetGBI( G_SPECIAL_1,            F3DEX2_SPECIAL_1,           F3DEX2_Special_1 );
+    GBI_SetGBI( G_SPECIAL_2,            F3DEX2_SPECIAL_2,           F3DEX2_Special_2 );
+    GBI_SetGBI( G_SPECIAL_3,            F3DEX2_SPECIAL_3,           F3DEX2_Special_3 );
+
+    GBI_SetGBI( G_VTX,                  F3DEX2_VTX,                 F3DEX2_Vtx );
+    GBI_SetGBI( G_MODIFYVTX,            F3DEX2_MODIFYVTX,           F3DEX_ModifyVtx );
+    GBI_SetGBI( G_CULLDL,               F3DEX2_CULLDL,              F3DEX_CullDL );
+    GBI_SetGBI( G_BRANCH_Z,             F3DEX2_BRANCH_Z,            F3DEX_Branch_Z );
+    GBI_SetGBI( G_TRI1,                 F3DEX2_TRI1,                F3DEX2_Tri1 );
+    GBI_SetGBI( G_TRI2,                 F3DEX2_TRI2,                F3DEX_Tri2 );
+    GBI_SetGBI( G_QUAD,                 F3DEX2_QUAD,                F3DEX2_Quad );
+//  GBI_SetGBI( G_LINE3D,               F3DEX2_LINE3D,              F3DEX2_Line3D );
+}
+
diff --git a/source/gles2n64/src/F3DEX2.h b/source/gles2n64/src/F3DEX2.h
new file mode 100644 (file)
index 0000000..b598e12
--- /dev/null
@@ -0,0 +1,88 @@
+#ifndef F3DEX2_H
+#define F3DEX2_H
+
+#define F3DEX2_MTX_STACKSIZE        18
+
+#define F3DEX2_MTX_MODELVIEW        0x00
+#define F3DEX2_MTX_PROJECTION       0x04
+#define F3DEX2_MTX_MUL              0x00
+#define F3DEX2_MTX_LOAD             0x02
+#define F3DEX2_MTX_NOPUSH           0x00
+#define F3DEX2_MTX_PUSH             0x01
+
+#define F3DEX2_TEXTURE_ENABLE       0x00000000
+#define F3DEX2_SHADING_SMOOTH       0x00200000
+#define F3DEX2_CULL_FRONT           0x00000200
+#define F3DEX2_CULL_BACK            0x00000400
+#define F3DEX2_CULL_BOTH            0x00000600
+#define F3DEX2_CLIPPING             0x00800000
+
+#define F3DEX2_MV_VIEWPORT          8
+
+#define F3DEX2_MWO_aLIGHT_1     0x00
+#define F3DEX2_MWO_bLIGHT_1     0x04
+#define F3DEX2_MWO_aLIGHT_2     0x18
+#define F3DEX2_MWO_bLIGHT_2     0x1c
+#define F3DEX2_MWO_aLIGHT_3     0x30
+#define F3DEX2_MWO_bLIGHT_3     0x34
+#define F3DEX2_MWO_aLIGHT_4     0x48
+#define F3DEX2_MWO_bLIGHT_4     0x4c
+#define F3DEX2_MWO_aLIGHT_5     0x60
+#define F3DEX2_MWO_bLIGHT_5     0x64
+#define F3DEX2_MWO_aLIGHT_6     0x78
+#define F3DEX2_MWO_bLIGHT_6     0x7c
+#define F3DEX2_MWO_aLIGHT_7     0x90
+#define F3DEX2_MWO_bLIGHT_7     0x94
+#define F3DEX2_MWO_aLIGHT_8     0xa8
+#define F3DEX2_MWO_bLIGHT_8     0xac
+
+
+#define F3DEX2_RDPHALF_2        0xF1
+#define F3DEX2_SETOTHERMODE_H   0xE3
+#define F3DEX2_SETOTHERMODE_L   0xE2
+#define F3DEX2_RDPHALF_1        0xE1
+#define F3DEX2_SPNOOP           0xE0
+#define F3DEX2_ENDDL            0xDF
+#define F3DEX2_DL               0xDE
+#define F3DEX2_LOAD_UCODE       0xDD
+#define F3DEX2_MOVEMEM          0xDC
+#define F3DEX2_MOVEWORD         0xDB
+#define F3DEX2_MTX              0xDA
+#define F3DEX2_GEOMETRYMODE     0xD9
+#define F3DEX2_POPMTX           0xD8
+#define F3DEX2_TEXTURE          0xD7
+#define F3DEX2_DMA_IO           0xD6
+#define F3DEX2_SPECIAL_1        0xD5
+#define F3DEX2_SPECIAL_2        0xD4
+#define F3DEX2_SPECIAL_3        0xD3
+
+#define F3DEX2_VTX              0x01
+#define F3DEX2_MODIFYVTX        0x02
+#define F3DEX2_CULLDL           0x03
+#define F3DEX2_BRANCH_Z         0x04
+#define F3DEX2_TRI1             0x05
+#define F3DEX2_TRI2             0x06
+#define F3DEX2_QUAD             0x07
+//#define F3DEX2_LINE3D         0x08
+
+
+void F3DEX2_Mtx( u32 w0, u32 w1 );
+void F3DEX2_MoveMem( u32 w0, u32 w1 );
+void F3DEX2_Vtx( u32 w0, u32 w1 );
+void F3DEX2_Reserved1( u32 w0, u32 w1 );
+void F3DEX2_Tri1( u32 w0, u32 w1 );
+void F3DEX2_PopMtx( u32 w0, u32 w1 );
+void F3DEX2_MoveWord( u32 w0, u32 w1 );
+void F3DEX2_Texture( u32 w0, u32 w1 );
+void F3DEX2_SetOtherMode_H( u32 w0, u32 w1 );
+void F3DEX2_SetOtherMode_L( u32 w0, u32 w1 );
+void F3DEX2_GeometryMode( u32 w0, u32 w1 );
+//void F3DEX2_Line3D( u32 w0, u32 w1 );
+void F3DEX2_DMAIO( u32 w0, u32 w1 );
+void F3DEX2_Special_1( u32 w0, u32 w1 );
+void F3DEX2_Special_2( u32 w0, u32 w1 );
+void F3DEX2_Special_3( u32 w0, u32 w1 );
+void F3DEX2_Quad( u32 w0, u32 w1 );
+void F3DEX2_Init();
+#endif
+
diff --git a/source/gles2n64/src/F3DPD.cpp b/source/gles2n64/src/F3DPD.cpp
new file mode 100644 (file)
index 0000000..fe521c4
--- /dev/null
@@ -0,0 +1,59 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DPD.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+
+void F3DPD_Vtx( u32 w0, u32 w1 )
+{
+    gSPCIVertex( w1, _SHIFTR( w0, 20, 4 ) + 1, _SHIFTR( w0, 16, 4 ) );
+}
+
+void F3DPD_VtxColorBase( u32 w0, u32 w1 )
+{
+    gSPSetVertexColorBase( w1 );
+}
+
+void F3DPD_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3D );
+
+    GBI.PCStackSize = 10;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_MTX,                  F3D_MTX,                F3D_Mtx );
+    GBI_SetGBI( G_RESERVED0,            F3D_RESERVED0,          F3D_Reserved0 );
+    GBI_SetGBI( G_MOVEMEM,              F3D_MOVEMEM,            F3D_MoveMem );
+    GBI_SetGBI( G_VTX,                  F3D_VTX,                F3DPD_Vtx );
+    GBI_SetGBI( G_RESERVED1,            F3D_RESERVED1,          F3D_Reserved1 );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_VTXCOLORBASE,         F3DPD_VTXCOLORBASE,     F3DPD_VtxColorBase );
+    GBI_SetGBI( G_RESERVED3,            F3D_RESERVED3,          F3D_Reserved3 );
+    GBI_SetGBI( G_SPRITE2D_BASE,        F3D_SPRITE2D_BASE,      F3D_Sprite2D_Base );
+
+    GBI_SetGBI( G_TRI1,                 F3D_TRI1,               F3D_Tri1 );
+    GBI_SetGBI( G_CULLDL,               F3D_CULLDL,             F3D_CullDL );
+    GBI_SetGBI( G_POPMTX,               F3D_POPMTX,             F3D_PopMtx );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3D_MoveWord );
+    GBI_SetGBI( G_TEXTURE,              F3D_TEXTURE,            F3D_Texture );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_SETGEOMETRYMODE,      F3D_SETGEOMETRYMODE,    F3D_SetGeometryMode );
+    GBI_SetGBI( G_CLEARGEOMETRYMODE,    F3D_CLEARGEOMETRYMODE,  F3D_ClearGeometryMode );
+    GBI_SetGBI( G_QUAD,                 F3D_QUAD,               F3D_Quad );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_RDPHALF_CONT,         F3D_RDPHALF_CONT,       F3D_RDPHalf_Cont );
+    GBI_SetGBI( G_TRI4,                 F3D_TRI4,               F3D_Tri4 );
+
+    gSPSetDMAOffsets( 0, 0 );
+}
+
diff --git a/source/gles2n64/src/F3DPD.h b/source/gles2n64/src/F3DPD.h
new file mode 100644 (file)
index 0000000..5d7c6e0
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef F3DPD_H
+#define F3DPD_H
+
+#define F3DPD_VTXCOLORBASE      0x07
+
+void F3DPD_Init();
+#endif
+
diff --git a/source/gles2n64/src/F3DWRUS.cpp b/source/gles2n64/src/F3DWRUS.cpp
new file mode 100644 (file)
index 0000000..8b1ff28
--- /dev/null
@@ -0,0 +1,73 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "F3DWRUS.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+
+
+void F3DWRUS_Vtx( u32 w0, u32 w1 )
+{
+    gSPVertex( w1, _SHIFTR( w0, 9, 7 ), _SHIFTR( w0, 16, 8 ) / 5 );
+}
+
+void F3DWRUS_Tri1( u32 w0, u32 w1 )
+{
+    gSP1Triangle( _SHIFTR( w1, 16, 8 ) / 5,
+                  _SHIFTR( w1, 8, 8 ) / 5,
+                  _SHIFTR( w1, 0, 8 ) / 5);
+}
+
+void F3DWRUS_Tri2( u32 w0, u32 w1 )
+{
+    gSP2Triangles( _SHIFTR( w0, 16, 8 ) / 5, _SHIFTR( w0, 8, 8 ) / 5, _SHIFTR( w0, 0, 8 ) / 5, 0,
+                   _SHIFTR( w1, 16, 8 ) / 5, _SHIFTR( w1, 8, 8 ) / 5, _SHIFTR( w1, 0, 8 ) / 5, 0);
+}
+
+void F3DWRUS_Quad( u32 w0, u32 w1 )
+{
+    gSP1Quadrangle( _SHIFTR( w1, 24, 8 ) / 5, _SHIFTR( w1, 16, 8 ) / 5, _SHIFTR( w1, 8, 8 ) / 5, _SHIFTR( w1, 0, 8 ) / 5 );
+}
+
+
+void F3DWRUS_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3D );
+
+    GBI.PCStackSize = 10;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_MTX,                  F3D_MTX,                F3D_Mtx );
+    GBI_SetGBI( G_RESERVED0,            F3D_RESERVED0,          F3D_Reserved0 );
+    GBI_SetGBI( G_MOVEMEM,              F3D_MOVEMEM,            F3D_MoveMem );
+    GBI_SetGBI( G_VTX,                  F3D_VTX,                F3DWRUS_Vtx );
+    GBI_SetGBI( G_RESERVED1,            F3D_RESERVED1,          F3D_Reserved1 );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_RESERVED2,            F3D_RESERVED2,          F3D_Reserved2 );
+    GBI_SetGBI( G_RESERVED3,            F3D_RESERVED3,          F3D_Reserved3 );
+    GBI_SetGBI( G_SPRITE2D_BASE,        F3D_SPRITE2D_BASE,      F3D_Sprite2D_Base );
+
+    GBI_SetGBI( G_TRI1,                 F3D_TRI1,               F3DWRUS_Tri1 );
+    GBI_SetGBI( G_CULLDL,               F3D_CULLDL,             F3D_CullDL );
+    GBI_SetGBI( G_POPMTX,               F3D_POPMTX,             F3D_PopMtx );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3D_MoveWord );
+    GBI_SetGBI( G_TEXTURE,              F3D_TEXTURE,            F3D_Texture );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_SETGEOMETRYMODE,      F3D_SETGEOMETRYMODE,    F3D_SetGeometryMode );
+    GBI_SetGBI( G_CLEARGEOMETRYMODE,    F3D_CLEARGEOMETRYMODE,  F3D_ClearGeometryMode );
+    GBI_SetGBI( G_QUAD,                 F3D_QUAD,               F3DWRUS_Quad );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_RDPHALF_CONT,         F3D_RDPHALF_CONT,       F3D_RDPHalf_Cont );
+    GBI_SetGBI( G_TRI2,                 F3DWRUS_TRI2,           F3DWRUS_Tri2 );
+}
+
diff --git a/source/gles2n64/src/F3DWRUS.h b/source/gles2n64/src/F3DWRUS.h
new file mode 100644 (file)
index 0000000..f38de7e
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef F3DWRUS_H
+#define F3DWRUS_H
+
+#define F3DWRUS_TRI2        0xB1
+void F3DWRUS_Init();
+
+#endif
+
diff --git a/source/gles2n64/src/FrameSkipper.cpp b/source/gles2n64/src/FrameSkipper.cpp
new file mode 100644 (file)
index 0000000..84c3428
--- /dev/null
@@ -0,0 +1,61 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *   Copyright (C) 2011 yongzh (freeman.yong@gmail.com)                    *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include "FrameSkipper.h"
+#include "ticks.h"
+
+FrameSkipper::FrameSkipper()
+       : skipType(AUTO), maxSkips(2), targetFPS(60)
+{
+}
+
+void FrameSkipper::start()
+{
+       initialTicks = 0;
+       virtualCount = 0;
+       skipCounter = 0;
+}
+
+void FrameSkipper::update()
+{
+       // for the first frame
+       if (initialTicks == 0) {
+               initialTicks = ticksGetTicks();
+               return;
+       }
+
+       unsigned int elapsed = ticksGetTicks() - initialTicks;
+       unsigned int realCount = elapsed * targetFPS / 1000;
+
+       virtualCount++;
+       if (realCount >= virtualCount) {
+               if (realCount > virtualCount &&
+                               skipType == AUTO && skipCounter < maxSkips) {
+                       skipCounter++;
+               } else {
+                       virtualCount = realCount;
+                       if (skipType == AUTO)
+                               skipCounter = 0;
+               }
+       }
+       if (skipType == MANUAL) {
+               if (++skipCounter > maxSkips)
+                       skipCounter = 0;
+       }
+}
diff --git a/source/gles2n64/src/FrameSkipper.h b/source/gles2n64/src/FrameSkipper.h
new file mode 100644 (file)
index 0000000..2b2ccb4
--- /dev/null
@@ -0,0 +1,55 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *   Copyright (C) 2011 yongzh (freeman.yong@gmail.com)                    *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#ifndef FRAME_SKIPPER_H
+#define FRAME_SKIPPER_H
+
+class FrameSkipper {
+public:
+       enum { AUTO, MANUAL };
+
+       FrameSkipper();
+
+       void setSkips(int type, int max) {
+               skipType = type;
+               maxSkips = max;
+       }
+
+       void setTargetFPS(int fps) {
+               targetFPS = fps;
+       }
+
+       bool willSkipNext() {
+               return (skipCounter > 0);
+       }
+
+       void start();
+       void update();
+
+private:
+       int skipType;
+       int maxSkips;
+       int targetFPS;
+       int skipCounter;
+       unsigned int initialTicks;
+       unsigned int virtualCount;
+};
+
+#endif
+
diff --git a/source/gles2n64/src/GBI.cpp b/source/gles2n64/src/GBI.cpp
new file mode 100644 (file)
index 0000000..daa6de7
--- /dev/null
@@ -0,0 +1,975 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include "gles2N64.h"
+#include "GBI.h"
+#include "RDP.h"
+#include "RSP.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "F3DEX2.h"
+#include "L3D.h"
+#include "L3DEX.h"
+#include "L3DEX2.h"
+#include "S2DEX.h"
+#include "S2DEX2.h"
+#include "F3DDKR.h"
+#include "F3DWRUS.h"
+#include "F3DPD.h"
+#include "F3DCBFD.h"
+#include "Types.h"
+# include <string.h>
+# include <unistd.h>
+# include <stdlib.h>
+# include "convert.h"
+#include "Common.h"
+#include "ticks.h"
+
+#include "CRC.h"
+#include "Debug.h"
+
+u32 uc_crc, uc_dcrc;
+char uc_str[256];
+
+SpecialMicrocodeInfo specialMicrocodes[] =
+{
+    {F3DWRUS, FALSE, 0xd17906e2, "RSP SW Version: 2.0D, 04-01-96"},
+    {F3DWRUS, FALSE,  0x94c4c833, "RSP SW Version: 2.0D, 04-01-96"},
+    {S2DEX, FALSE, 0x9df31081, "RSP Gfx ucode S2DEX  1.06 Yoshitaka Yasumoto Nintendo."},
+    {F3DDKR, FALSE, 0x8d91244f, "Diddy Kong Racing"},
+    {F3DDKR, FALSE, 0x6e6fc893, "Diddy Kong Racing"},
+    {F3DDKR, FALSE, 0xbde9d1fb, "Jet Force Gemini"},
+    {F3DPD, FALSE, 0x1c4f7869, "Perfect Dark"},
+    {F3DEX, FALSE, 0x0ace4c3f, "Mario Kart"},
+    //{F3DEX, FALSE, 0xda51ccdb, "Rogue Squadron"},
+    //{F3DCBFD, FALSE, 0x1b4ace88, "RSP Gfx ucode F3DEXBG.NoN fifo 2.08  Yoshitaka Yasumoto 1999 Nintendo."},
+};
+
+u32 G_RDPHALF_1, G_RDPHALF_2, G_RDPHALF_CONT;
+u32 G_SPNOOP;
+u32 G_SETOTHERMODE_H, G_SETOTHERMODE_L;
+u32 G_DL, G_ENDDL, G_CULLDL, G_BRANCH_Z;
+u32 G_LOAD_UCODE;
+u32 G_MOVEMEM, G_MOVEWORD;
+u32 G_MTX, G_POPMTX;
+u32 G_GEOMETRYMODE, G_SETGEOMETRYMODE, G_CLEARGEOMETRYMODE;
+u32 G_TEXTURE;
+u32 G_DMA_IO, G_DMA_DL, G_DMA_TRI, G_DMA_MTX, G_DMA_VTX, G_DMA_OFFSETS;
+u32 G_SPECIAL_1, G_SPECIAL_2, G_SPECIAL_3;
+u32 G_VTX, G_MODIFYVTX, G_VTXCOLORBASE;
+u32 G_TRI1, G_TRI2, G_TRI4;
+u32 G_QUAD, G_LINE3D;
+u32 G_RESERVED0, G_RESERVED1, G_RESERVED2, G_RESERVED3;
+u32 G_SPRITE2D_BASE;
+u32 G_BG_1CYC, G_BG_COPY;
+u32 G_OBJ_RECTANGLE, G_OBJ_SPRITE, G_OBJ_MOVEMEM;
+u32 G_SELECT_DL, G_OBJ_RENDERMODE, G_OBJ_RECTANGLE_R;
+u32 G_OBJ_LOADTXTR, G_OBJ_LDTX_SPRITE, G_OBJ_LDTX_RECT, G_OBJ_LDTX_RECT_R;
+u32 G_RDPHALF_0, G_TRI_UNKNOWN;
+
+u32 G_MTX_STACKSIZE;
+u32 G_MTX_MODELVIEW;
+u32 G_MTX_PROJECTION;
+u32 G_MTX_MUL;
+u32 G_MTX_LOAD;
+u32 G_MTX_NOPUSH;
+u32 G_MTX_PUSH;
+
+u32 G_TEXTURE_ENABLE;
+u32 G_SHADING_SMOOTH;
+u32 G_CULL_FRONT;
+u32 G_CULL_BACK;
+u32 G_CULL_BOTH;
+u32 G_CLIPPING;
+
+u32 G_MV_VIEWPORT;
+
+u32 G_MWO_aLIGHT_1, G_MWO_bLIGHT_1;
+u32 G_MWO_aLIGHT_2, G_MWO_bLIGHT_2;
+u32 G_MWO_aLIGHT_3, G_MWO_bLIGHT_3;
+u32 G_MWO_aLIGHT_4, G_MWO_bLIGHT_4;
+u32 G_MWO_aLIGHT_5, G_MWO_bLIGHT_5;
+u32 G_MWO_aLIGHT_6, G_MWO_bLIGHT_6;
+u32 G_MWO_aLIGHT_7, G_MWO_bLIGHT_7;
+u32 G_MWO_aLIGHT_8, G_MWO_bLIGHT_8;
+
+//GBIFunc GBICmd[256];
+GBIInfo GBI;
+
+void GBI_Unknown( u32 w0, u32 w1 )
+{
+}
+
+#if 0
+INT_PTR CALLBACK MicrocodeDlgProc( HWND hWndDlg, UINT uMsg, WPARAM wParam, LPARAM lParam )
+{
+    switch (uMsg)
+    {
+        case WM_INITDIALOG:
+            for (int i = 0; i < numMicrocodeTypes; i++)
+            {
+                SendDlgItemMessage( hWndDlg, IDC_MICROCODE, CB_ADDSTRING, 0, (LPARAM)MicrocodeTypes[i] );
+            }
+            SendDlgItemMessage( hWndDlg, IDC_MICROCODE, CB_SETCURSEL, 0, 0 );
+
+            char text[1024];
+            sprintf( text, "Microcode CRC:\t\t0x%08x\r\nMicrocode Data CRC:\t0x%08x\r\nMicrocode Text:\t\t%s", uc_crc, uc_dcrc, uc_str );
+            SendDlgItemMessage( hWndDlg, IDC_TEXTBOX, WM_SETTEXT, NULL, (LPARAM)text );
+            return TRUE;
+
+        case WM_COMMAND:
+            switch (LOWORD(wParam))
+            {
+                case IDOK:
+                    EndDialog( hWndDlg, SendDlgItemMessage( hWndDlg, IDC_MICROCODE, CB_GETCURSEL, 0, 0 ) );
+                    return TRUE;
+
+                case IDCANCEL:
+                    EndDialog( hWndDlg, NONE );
+                    return TRUE;
+            }
+            break;
+    }
+
+    return FALSE;
+}
+#elif defined(USE_GTK)
+static int selectedMicrocode = -1;
+static GtkWidget *microcodeWindow = 0;
+static GtkWidget *microcodeList = 0;
+
+static void okButton_clicked( GtkWidget *widget, void *data )
+{
+    gtk_widget_hide( microcodeWindow );
+    if (GTK_LIST(microcodeList)->selection != 0)
+    {
+        char *text = 0;
+        GtkListItem *item = GTK_LIST_ITEM(GTK_LIST(microcodeList)->selection->data);
+        GtkLabel *label = GTK_LABEL(GTK_BIN(item)->child);
+        gtk_label_get( label, &text );
+        if (text != 0)
+            for (int i = 0; i < numMicrocodeTypes; i++)
+                if (!strcmp( text, MicrocodeTypes[i] ))
+                {
+                    selectedMicrocode = i;
+                    return;
+                }
+    }
+
+    selectedMicrocode = NONE;
+}
+
+static void stopButton_clicked( GtkWidget *widget, void *data )
+{
+    gtk_widget_hide( microcodeWindow );
+    selectedMicrocode = NONE;
+}
+
+static gint
+delete_question_event(GtkWidget *widget, GdkEvent *event, gpointer data)
+{
+    return TRUE; // undeleteable
+}
+
+static int MicrocodeDialog()
+{
+    GtkWidget *infoLabel;
+    GtkWidget *infoFrame, *infoTable;
+    GtkWidget *crcInfoLabel, *crcDataInfoLabel, *textInfoLabel;
+    GtkWidget *crcLabel = NULL, *crcDataLabel = NULL, *textLabel = NULL;
+    GtkWidget *selectUcodeLabel;
+    //GtkWidget *microcodeLabel;
+    GtkWidget *okButton, *stopButton;
+    GList *ucodeList = 0;
+    char buf[1024];
+
+    if (!g_thread_supported())
+        g_thread_init( NULL );
+    gdk_threads_enter();
+
+    // create dialog
+    if (microcodeWindow == 0)
+    {
+        microcodeWindow = gtk_dialog_new();
+        gtk_signal_connect( GTK_OBJECT(microcodeWindow), "delete_event",
+                            GTK_SIGNAL_FUNC(delete_question_event), (gpointer)NULL );
+        sprintf( buf, "%s - unknown microcode", pluginName );
+        gtk_window_set_title( GTK_WINDOW(microcodeWindow), buf );
+        gtk_container_set_border_width( GTK_CONTAINER(GTK_DIALOG(microcodeWindow)->vbox), 11 );
+
+        // ok button
+        okButton = gtk_button_new_with_label( "Ok" );
+        gtk_signal_connect_object( GTK_OBJECT(okButton), "clicked",
+                               GTK_SIGNAL_FUNC(okButton_clicked), NULL );
+        gtk_container_add( GTK_CONTAINER(GTK_DIALOG(microcodeWindow)->action_area), okButton );
+
+        // stop button
+        stopButton = gtk_button_new_with_label( "Stop" );
+        gtk_signal_connect_object( GTK_OBJECT(stopButton), "clicked",
+                               GTK_SIGNAL_FUNC(stopButton_clicked), NULL );
+        gtk_container_add( GTK_CONTAINER(GTK_DIALOG(microcodeWindow)->action_area), stopButton );
+
+        // info label
+        infoLabel = gtk_label_new( "Unknown microcode. Please notify Orkin, including the following information:" );
+        gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), infoLabel );
+
+        // info frame
+        infoFrame = gtk_frame_new( "Microcode info" );
+        gtk_container_set_border_width( GTK_CONTAINER(infoFrame), 7 );
+        gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), infoFrame );
+
+        infoTable = gtk_table_new( 3, 2, FALSE );
+        gtk_container_set_border_width( GTK_CONTAINER(infoTable), 7 );
+        gtk_table_set_col_spacings( GTK_TABLE(infoTable), 3 );
+        gtk_table_set_row_spacings( GTK_TABLE(infoTable), 3 );
+        gtk_container_add( GTK_CONTAINER(infoFrame), infoTable );
+
+        crcInfoLabel = gtk_label_new( "Microcode CRC:" );
+        crcDataInfoLabel = gtk_label_new( "Microcode Data CRC:" );
+        textInfoLabel = gtk_label_new( "Microcode Text:" );
+
+        crcLabel = gtk_label_new( "" );
+        crcDataLabel = gtk_label_new( "" );
+        textLabel = gtk_label_new( "" );
+
+        gtk_table_attach_defaults( GTK_TABLE(infoTable), crcInfoLabel, 0, 1, 0, 1 );
+        gtk_table_attach_defaults( GTK_TABLE(infoTable), crcLabel, 1, 2, 0, 1 );
+        gtk_table_attach_defaults( GTK_TABLE(infoTable), crcDataInfoLabel, 0, 1, 1, 2 );
+        gtk_table_attach_defaults( GTK_TABLE(infoTable), crcDataLabel, 1, 2, 1, 2 );
+        gtk_table_attach_defaults( GTK_TABLE(infoTable), textInfoLabel, 0, 1, 2, 3 );
+        gtk_table_attach_defaults( GTK_TABLE(infoTable), textLabel, 1, 2, 2, 3 );
+
+        selectUcodeLabel = gtk_label_new( "You can manually select the closest matching microcode." );
+        for (int i = 0; i < numMicrocodeTypes; i++)
+            ucodeList = g_list_append( ucodeList, gtk_list_item_new_with_label( MicrocodeTypes[i] ) );
+        microcodeList = gtk_list_new();
+        gtk_list_set_selection_mode( GTK_LIST(microcodeList), GTK_SELECTION_SINGLE );
+        gtk_list_append_items( GTK_LIST(microcodeList), ucodeList );
+
+        gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), selectUcodeLabel );
+        gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), microcodeList );
+    }
+
+    snprintf( buf, 1024, "0x%8.8X", (unsigned int)uc_crc );
+        if(crcLabel) gtk_label_set_text( GTK_LABEL(crcLabel), buf );
+    snprintf( buf, 1024, "0x%8.8X", (unsigned int)uc_dcrc );
+    if(crcDataLabel) gtk_label_set_text( GTK_LABEL(crcDataLabel), buf );
+    if(textLabel) gtk_label_set_text( GTK_LABEL(textLabel), uc_str );
+
+    selectedMicrocode = -1;
+    gtk_widget_show_all( microcodeWindow );
+
+    while (selectedMicrocode == -1)
+    {
+        if( gtk_main_iteration() )
+            break;
+        usleep( 10000 );
+    }
+    gdk_threads_leave();
+
+    return selectedMicrocode;
+}
+#else
+static int MicrocodeDialog()
+{
+    // FIXME
+    return 0;
+}
+#endif
+
+MicrocodeInfo *GBI_AddMicrocode()
+{
+    MicrocodeInfo *newtop = (MicrocodeInfo*)malloc( sizeof( MicrocodeInfo ) );
+
+    newtop->lower = GBI.top;
+    newtop->higher = NULL;
+
+    if (GBI.top)
+        GBI.top->higher = newtop;
+
+    if (!GBI.bottom)
+        GBI.bottom = newtop;
+
+    GBI.top = newtop;
+
+    GBI.numMicrocodes++;
+
+
+    return newtop;
+}
+
+void GBI_Init()
+{
+    GBI.top = NULL;
+    GBI.bottom = NULL;
+    GBI.current = NULL;
+    GBI.numMicrocodes = 0;
+
+    for (u32 i = 0; i <= 0xFF; i++)
+        GBI.cmd[i] = GBI_Unknown;
+
+#ifdef PROFILE_GBI
+    GBI_ProfileInit();
+#endif
+}
+
+void GBI_Destroy()
+{
+    while (GBI.bottom)
+    {
+        MicrocodeInfo *newBottom = GBI.bottom->higher;
+
+        if (GBI.bottom == GBI.top)
+            GBI.top = NULL;
+
+        free( GBI.bottom );
+
+        GBI.bottom = newBottom;
+
+        if (GBI.bottom)
+            GBI.bottom->lower = NULL;
+
+        GBI.numMicrocodes--;
+    }
+}
+
+#ifdef PROFILE_GBI
+void GBI_ProfileInit()
+{
+    GBI_ProfileReset();
+}
+
+void GBI_ProfileBegin(u32 cmd)
+{
+    GBI.profileTmp = ticksGetTicks();
+}
+
+void GBI_ProfileEnd(u32 cmd)
+{
+    unsigned int i = 256*GBI.current->type + cmd;
+    GBI.profileNum[i]++;
+    GBI.profileTimer[i] += ticksGetTicks() - GBI.profileTmp;
+}
+
+void
+GBI_ProfileReset()
+{
+    memset(GBI.profileTimer, 0, 12 * 256 * sizeof(int));
+    memset(GBI.profileNum, 0, 12 * 256 * sizeof(int));
+}
+
+u32
+GBI_GetFuncTime(u32 ucode, u32 cmd)
+{
+    return GBI.profileTimer[ucode*256+cmd];
+}
+
+u32
+GBI_GetFuncNum(u32 ucode, u32 cmd)
+{
+    return GBI.profileNum[ucode*256+cmd];
+}
+
+u32
+GBI_ProfilePrint(FILE *file)
+{
+    int uc, cmd, total=0;
+
+    for(uc=0;uc<12;uc++)
+    {
+        for(cmd=0;cmd<256;cmd++)
+        {
+            total += GBI_GetFuncTime(uc, cmd);
+        }
+    }
+
+
+    for(uc=0;uc<12;uc++)
+    {
+        for(cmd=0;cmd<256;cmd++)
+        {
+            unsigned int t = GBI_GetFuncTime(uc, cmd);
+            if (t != 0)
+            {
+                fprintf(file, "%s x %i = %u ms (%.2f%%)\n", GBI_GetFuncName(uc,cmd), GBI_GetFuncNum(uc, cmd), t, 100.0f * (float)t / total);
+            }
+        }
+    }
+    return total;
+}
+
+const char*
+GBI_GetUcodeName(u32 ucode)
+{
+    switch(ucode)
+    {
+        case F3D:       return "F3D";
+        case F3DEX:     return "F3DEX";
+        case F3DEX2:    return "F3DEX2";
+        case L3D:       return "L3D";
+        case L3DEX:     return "L3DEX";
+        case L3DEX2:    return "L3DEX2";
+        case S2DEX:     return "S2DEX";
+        case S2DEX2:    return "S2DEX2";
+        case F3DPD:     return "F3DPD";
+        case F3DDKR:    return "F3DDKR";
+        case F3DWRUS:   return "F3DWRUS";
+        case NONE:      return "NONE";
+        default:        return "UNKNOWN UCODE";
+    }
+}
+
+const char*
+GBI_GetFuncName(unsigned int ucode, unsigned int cmd)
+{
+    switch(cmd)
+    {
+        //common
+        case G_SETCIMG:             return "G_SETCIMG";
+        case G_SETZIMG:             return "G_SETZIMG";
+        case G_SETTIMG:             return "G_SETTIMG";
+        case G_SETCOMBINE:          return "G_SETCOMBINE";
+        case G_SETENVCOLOR:         return "G_SETENVCOLOR";
+        case G_SETPRIMCOLOR:        return "G_SETPRIMCOLOR";
+        case G_SETBLENDCOLOR:       return "G_SETBLENDCOLOR";
+        case G_SETFOGCOLOR:         return "G_SETFOGCOLOR";
+        case G_SETFILLCOLOR:        return "G_SETFILLCOLOR";
+        case G_FILLRECT:            return "G_FILLRECT";
+        case G_SETTILE:             return "G_SETTILE";
+        case G_LOADTILE:            return "G_LOADTILE";
+        case G_LOADBLOCK:           return "G_LOADBLOCK";
+        case G_SETTILESIZE:         return "G_SETTILESIZE";
+        case G_LOADTLUT:            return "G_LOADTLUT";
+        case G_RDPSETOTHERMODE:     return "G_RDPSETOTHERMODE";
+        case G_SETPRIMDEPTH:        return "G_SETPRIMDEPTH";
+        case G_SETSCISSOR:          return "G_SETSCISSOR";
+        case G_SETCONVERT:          return "G_SETCONVERT";
+        case G_SETKEYR:             return "G_SETKEYR";
+        case G_SETKEYGB:            return "G_SETKEYGB";
+        case G_RDPFULLSYNC:         return "G_RDPFULLSYNC";
+        case G_RDPTILESYNC:         return "G_RDPTILESYNC";
+        case G_RDPPIPESYNC:         return "G_RDPPIPESYNC";
+        case G_RDPLOADSYNC:         return "G_RDPLOADSYNC";
+        case G_TEXRECTFLIP:         return "G_TEXRECTFLIP";
+
+        //ucode
+        case 0x00:                  return "SPNOOP";
+
+/*
+        F3D_MTX:                0x01
+        F3DEX2_VTX              0x01
+        F3DDKR_DMA_MTX          0x01
+        S2DEX_BG_1CYC           0x01
+        S2DEX2_OBJ_RECTANGLE    0x01
+*/
+        case 0x01:
+        {
+            switch(ucode)
+            {
+                case F3DEX2:        return "F3DEX2_VTX";
+                case F3DDKR:        return "F3DDKR_DMA_MTX";
+                case S2DEX:         return "S2DEX_BG_1CYC";
+                case S2DEX2:        return "S2DEX2_OBJ_RECTANGLE";
+                default:            return "F3D_MTX";
+            }
+        }
+/*
+        F3D_RESERVED0:          0x02
+        F3DEX2_MODIFYVTX        0x02
+        S2DEX_BG_COPY           0x02
+        S2DEX2_OBJ_SPRITE       0x02
+*/
+        case 0x02:
+        {
+            switch(ucode)
+            {
+                case F3DEX2:        return "F3DEX2_MODIFYVTX";
+                case S2DEX:         return "S2DEX_BG_COPY";
+                case S2DEX2:        return "S2DEX2_OBJ_SPRITE";
+                default:            return "F3D_RESERVED0";
+            }
+        }
+/*
+        F3D_MOVEMEM:            0x03
+        F3DEX2_CULLDL           0x03
+        S2DEX_OBJ_RECTANGLE     0x03
+*/
+        case 0x03:
+        {
+            switch(ucode)
+            {
+                case F3DEX2:        return "F3DEX2_CULLDL";
+                case S2DEX:         return "S2DEX_OBJ_RECTANGLE";
+                default:            return "F3D_MOVEMEM";
+            }
+        }
+/*
+        F3D_VTX:                0x04
+        F3DEX2_BRANCH_Z         0x04
+        F3DDKR_DMA_VTX          0x04
+        S2DEX_OBJ_SPRITE        0x04
+        S2DEX2_SELECT_DL        0x04
+*/
+        case 0x04:
+        {
+            switch(ucode)
+            {
+                case F3DEX2:        return "F3DEX2_BRANCH_Z";
+                case F3DDKR:        return "F3DDKR_DMA_VTX";
+                case S2DEX:         return "S2DEX_OBJ_SPRITE";
+                case S2DEX2:        return "S2DEX2_SELECT_DL";
+                default:            return "F3D_VTX";
+            }
+        }
+
+/*
+        F3D_RESERVED1:          0x05
+        F3DEX2_TRI1             0x05
+        F3DDKR_DMA_TRI          0x05
+        S2DEX_OBJ_MOVEMEM       0x05
+        S2DEX2_OBJ_LOADTXTR     0x05
+*/
+        case 0x05:
+        {
+            switch(ucode)
+            {
+                case F3DEX2:        return "F3DEX2_TR1";
+                case F3DDKR:        return "F3DDKR_DMA_TRI";
+                case S2DEX:         return "S2DEX_OBJ_MOVEMEM";
+                case S2DEX2:        return "S2DEX2_OBJ_LOADTXTR";
+                default:            return "F3D_RESERVED1";
+            }
+        }
+/*
+        F3D_DL:                 0x06
+        F3DEX2_TRI2             0x06
+        S2DEX2_OBJ_LDTX_SPRITE  0x06
+*/
+        case 0x06:
+        {
+            switch(ucode)
+            {
+                case F3DEX2:        return "F3DEX2_TR2";
+                case S2DEX2:        return "S2DEX2_OBJ_LDTX_SPRITE";
+                default:            return "F3D_DL";
+            }
+        }
+
+/*
+        F3D_RESERVED2:          0x07
+        F3DEX2_QUAD             0x07
+        F3DPD_VTXCOLORBASE      0x07
+        F3DDKR_DMA_DL           0x07
+        S2DEX2_OBJ_LDTX_RECT    0x07
+*/
+        case 0x07:
+        {
+            switch(ucode)
+            {
+                case F3DEX2:        return "F3DEX2_QUAD";
+                case F3DPD:         return "F3DPD_VTXCOLORBASE";
+                case F3DDKR:        return "F3DDKR_DMA_DL";
+                case S2DEX2:        return "S2DEX2_OBJ_LDTX_RECT";
+                default:            return "F3D_RESERVED2";
+            }
+        }
+/*
+        F3D_RESERVED3:          0x08
+        L3DEX2_LINE3D           0x08
+        S2DEX2_OBJ_LDTX_RECT_R  0x08
+*/
+        case 0x08:
+        {
+            switch(ucode)
+            {
+                case L3DEX2:        return "L3DEX2_LINE3D";
+                case S2DEX2:        return "S2DEX2_OBJ_LDTX_RECT_R";
+                default:            return "F3D_RESERVED3";
+            }
+        }
+
+/*
+        F3D_SPRITE2D_BASE:      0x09
+        S2DEX2_BG_1CYC          0x09
+*/
+        case 0x09:
+        {
+            switch(ucode)
+            {
+                case S2DEX2:        return "S2DEX2_BG_1CYC";
+                default:            return "F3D_SPRITE2D_BASE";
+            }
+        }
+
+//        S2DEX2_BG_COPY          0x0A
+        case 0x0A:                  return "S2DEX2_BG_COPY";
+//        S2DEX2_OBJ_RENDERMODE   0x0B
+        case 0x0B:                  return "S2DEX2_OBJ_RENDERMODE";
+//        F3DEX2_RDPHALF_2        0xF1
+        case 0xF1:                  return "F3DEX2_RDPHALF_2";
+/*
+        S2DEX_RDPHALF_0         0xE4
+        S2DEX2_RDPHALF_0        0xE4
+*/
+        case 0xE4:
+        {
+            switch(ucode)
+            {
+                case S2DEX:         return "S2DEX_RDPHALF_0";
+                case S2DEX2:        return "S2DEX2_RDPHALF_0";
+                default:            return "G_TEXRECT";
+            }
+        }
+//        F3DEX2_SETOTHERMODE_H   0xE3
+        case 0xE3:                  return "F3DEX2_SETOTHERMODE_H";
+//        F3DEX2_SETOTHERMODE_L   0xE2
+        case 0xE2:                  return "F3DEX2_SETOTHERMODE_L";
+//        F3DEX2_RDPHALF_1        0xE1
+        case 0xE1:                  return "F3DEX2_RDPHALF_1";
+//        F3DEX2_SPNOOP           0xE0
+        case 0xE0:                  return "F3DEX2_SPNOOP";
+//        F3DEX2_ENDDL            0xDF
+        case 0xDF:                  return "F3DEX2_ENDDL";
+//        F3DEX2_DL               0xDE
+        case 0xDE:                  return "F3DEX2_DL";
+//        F3DEX2_LOAD_UCODE       0xDD
+        case 0xDD:                  return "F3DEX2_LOAD_UCODE";
+/*
+        F3DEX2_MOVEMEM          0xDC
+        S2DEX2_OBJ_MOVEMEM      0xDC
+*/
+        case 0xDC:
+        {
+            switch(ucode)
+            {
+                case S2DEX2:        return "S2DEX2_OBJ_MOVEMEM";
+                default:            return "F3DEX2_MOVEMEM";
+            }
+        }
+//        F3DEX2_MOVEWORD         0xDB
+        case 0xDB:                  return "F3DEX2_MOVEWORD";
+/*
+        F3DEX2_MTX              0xDA
+        S2DEX2_OBJ_RECTANGLE_R  0xDA
+*/
+        case 0xDA:
+        {
+            switch(ucode)
+            {
+                case S2DEX2:        return "S2DEX2_OBJ_RECTANGLE_R";
+                default:            return "F3DEX2_MTX";
+            }
+        }
+//        F3DEX2_GEOMETRYMODE     0xD9
+        case 0xD9:                  return "F3DEX2_GEOMETRYMODE";
+//        F3DEX2_POPMTX           0xD8
+        case 0xD8:                  return "F3DEX2_POPMTX";
+//        F3DEX2_TEXTURE          0xD7
+        case 0xD7:                  return "F3DEX2_TEXTURE";
+//        F3DEX2_DMA_IO           0xD6
+        case 0xD6:                  return "F3DEX2_DMA_IO";
+//        F3DEX2_SPECIAL_1        0xD5
+        case 0xD5:                  return "F3DEX2_SPECIAL_1";
+//        F3DEX2_SPECIAL_2        0xD4
+        case 0xD4:                  return "F3DEX2_SPECIAL_2";
+//        F3DEX2_SPECIAL_3        0xD3
+        case 0xD3:                  return "F3DEX2_SPECIAL_3";
+
+//        S2DEX_OBJ_LOADTXTR      0xC1
+        case 0xC1:                  return "S2DEX_OBJ_LOADTXTR";
+//        S2DEX_OBJ_LDTX_SPRITE   0xC2
+        case 0xC2:                  return "S2DEX_OBJ_LDTX_SPRITE";
+//        S2DEX_OBJ_LDTX_RECT     0xC3
+        case 0xC3:                  return "S2DEX_OBJ_LDTX_RECT";
+//        S2DEX_OBJ_LDTX_RECT_R   0xC4
+        case 0xC4:                  return "S2DEX_OBJ_LDTX_RECT_R";
+/*
+        F3D_TRI1:               0xBF
+        F3DDKR_DMA_OFFSETS      0xBF
+*/
+        case 0xBF:
+        {
+            switch(ucode)
+            {
+                case F3DDKR:        return "F3DDKR_DMA_OFFSETS";
+                default:            return "F3D_TRI1";
+            }
+        }
+
+//        F3D_CULLDL:             0xBE
+        case 0xBE:                  return "F3D_CULLDL";
+//        F3D_POPMTX:             0xBD
+        case 0xBD:                  return "F3D_POPMTX";
+//        F3D_MOVEWORD:           0xBC
+        case 0xBC:                  return "F3D_MOVEWORD";
+//        F3D_TEXTURE:            0xBB
+        case 0xBB:                  return "F3D_TEXTURE";
+//        F3D_SETOTHERMODE_H:     0xBA
+        case 0xBA:                  return "F3D_SETOTHERMODE_H";
+//        F3D_SETOTHERMODE_L:     0xB9
+        case 0xB9:                  return "F3D_SETOTHERMODE_L";
+//        F3D_ENDDL:              0xB8
+        case 0xB8:                  return "F3D_ENDDL";
+//        F3D_SETGEOMETRYMODE:    0xB7
+        case 0xB7:                  return "F3D_SETGEOMETRYMODE";
+//        F3D_CLEARGEOMETRYMODE:  0xB6
+        case 0xB6:                  return "F3D_CLEARGEOMETRYMODE";
+/*
+        F3D_QUAD:               0xB5
+        L3D_LINE3D              0xB5
+*/
+        case 0xB5:
+        {
+            switch(ucode)
+            {
+                case L3D:           return "L3D_LINE3D";
+                default:            return "F3D_QUAD";
+            }
+        }
+
+//        F3D_RDPHALF_1:          0xB4
+        case 0xB4:                  return "F3D_RDPHALF_1";
+//        F3D_RDPHALF_2:          0xB3
+        case 0xB3:                  return "F3D_RDPHALF_2";
+/*
+        F3D_RDPHALF_CONT:       0xB2
+        F3DEX_MODIFYVTX         0xB2
+        S2DEX_OBJ_RECTANGLE_R   0xB2
+*/
+        case 0xB2:
+        {
+            switch(ucode)
+            {
+                case F3DEX:         return "F3DEX_MODIFYVTX";
+                case S2DEX:         return "S2DEX_OBJ_RECTANGLE_R";
+                default:            return "F3D_RDPHALF_CONT";
+            }
+        }
+/*
+        F3D_TRI4:               0xB1
+        F3DEX_TRI2              0xB1
+        F3DWRUS_TRI2            0xB1
+        S2DEX_OBJ_RENDERMODE    0xB1
+*/
+        case 0xB1:
+        {
+            switch(ucode)
+            {
+                case F3DEX:         return "F3DEX_TRI2";
+                case F3DWRUS:       return "F3DWRUS_TRI2";
+                case S2DEX:         return "S2DEX_OBJ_RENDERMODE";
+                default:            return "F3D_TRI4";
+            }
+        }
+/*
+        F3DEX_BRANCH_Z          0xB0
+        S2DEX_SELECT_DL         0xB0
+*/
+        case 0xB0:
+        {
+            switch(ucode)
+            {
+                case S2DEX:         return "S2DEX_SELECT_DL";
+                default:            return "F3DEX_BRANCH_Z";
+            }
+        }
+/*
+        F3DEX_LOAD_UCODE        0xAF
+        S2DEX_LOAD_UCODE        0xAF
+*/
+        case 0xAF:
+        {
+            switch(ucode)
+            {
+                case S2DEX:         return "S2DEX_LOAD_UCODE";
+                default:            return "F3DEX_LOAD_UCODE";
+            }
+        }
+
+        default:
+        {
+            if (ucode == F3DCBFD)
+            {
+                if (cmd >= 0x10 && cmd <= 0x1f)
+                    return "F3DCBFD_TRI4";
+
+            }
+            return "UNKNOWN CMD";
+        }
+    }
+}
+#endif
+
+MicrocodeInfo *GBI_DetectMicrocode( u32 uc_start, u32 uc_dstart, u16 uc_dsize )
+{
+    MicrocodeInfo *current;
+
+    for (unsigned int i = 0; i < GBI.numMicrocodes; i++)
+    {
+        current = GBI.top;
+
+        while (current)
+        {
+            if ((current->address == uc_start) && (current->dataAddress == uc_dstart) && (current->dataSize == uc_dsize))
+                return current;
+
+            current = current->lower;
+        }
+    }
+
+    current = GBI_AddMicrocode();
+
+    current->address = uc_start;
+    current->dataAddress = uc_dstart;
+    current->dataSize = uc_dsize;
+    current->NoN = FALSE;
+    current->type = NONE;
+
+    // See if we can identify it by CRC
+    uc_crc = CRC_Calculate( 0xFFFFFFFF, &RDRAM[uc_start & 0x1FFFFFFF], 4096);
+    LOG(LOG_MINIMAL, "UCODE CRC=0x%x\n", uc_crc);
+
+    for (u32 i = 0; i < sizeof( specialMicrocodes ) / sizeof( SpecialMicrocodeInfo ); i++)
+    {
+        if (uc_crc == specialMicrocodes[i].crc)
+        {
+            current->type = specialMicrocodes[i].type;
+            return current;
+        }
+    }
+
+    // See if we can identify it by text
+    char uc_data[2048];
+    UnswapCopy( &RDRAM[uc_dstart & 0x1FFFFFFF], uc_data, 2048 );
+    strcpy( uc_str, "Not Found" );
+
+    for (u32 i = 0; i < 2048; i++)
+    {
+        if ((uc_data[i] == 'R') && (uc_data[i+1] == 'S') && (uc_data[i+2] == 'P'))
+        {
+            u32 j = 0;
+            while (uc_data[i+j] > 0x0A)
+            {
+                uc_str[j] = uc_data[i+j];
+                j++;
+            }
+
+            uc_str[j] = 0x00;
+
+            int type = NONE;
+
+            if (strncmp( &uc_str[4], "SW", 2 ) == 0)
+            {
+                type = F3D;
+            }
+            else if (strncmp( &uc_str[4], "Gfx", 3 ) == 0)
+            {
+                current->NoN = (strncmp( &uc_str[20], ".NoN", 4 ) == 0);
+
+                if (strncmp( &uc_str[14], "F3D", 3 ) == 0)
+                {
+                    if (uc_str[28] == '1')
+                        type = F3DEX;
+                    else if (uc_str[31] == '2')
+                        type = F3DEX2;
+                }
+                else if (strncmp( &uc_str[14], "L3D", 3 ) == 0)
+                {
+                    if (uc_str[28] == '1')
+                        type = L3DEX;
+                    else if (uc_str[31] == '2')
+                        type = L3DEX2;
+                }
+                else if (strncmp( &uc_str[14], "S2D", 3 ) == 0)
+                {
+                    if (uc_str[28] == '1')
+                        type = S2DEX;
+                    else if (uc_str[31] == '2')
+                        type = S2DEX2;
+                }
+            }
+
+            LOG(LOG_VERBOSE, "UCODE STRING=%s\n", uc_str);
+
+            if (type != NONE)
+            {
+                current->type = type;
+                return current;
+            }
+
+            break;
+        }
+    }
+
+
+    for (u32 i = 0; i < sizeof( specialMicrocodes ) / sizeof( SpecialMicrocodeInfo ); i++)
+    {
+        if (strcmp( uc_str, specialMicrocodes[i].text ) == 0)
+        {
+            current->type = specialMicrocodes[i].type;
+            return current;
+        }
+    }
+
+    // Let the user choose the microcode
+    LOG(LOG_ERROR, "[gles2n64]: Warning - unknown ucode!!!\n");
+    if(last_good_ucode != (u32)-1)
+    {
+        current->type=last_good_ucode;
+    }
+    else
+    {
+        current->type = MicrocodeDialog();
+    }
+    return current;
+}
+
+void GBI_MakeCurrent( MicrocodeInfo *current )
+{
+    if (current != GBI.top)
+    {
+        if (current == GBI.bottom)
+        {
+            GBI.bottom = current->higher;
+            GBI.bottom->lower = NULL;
+        }
+        else
+        {
+            current->higher->lower = current->lower;
+            current->lower->higher = current->higher;
+        }
+
+        current->higher = NULL;
+        current->lower = GBI.top;
+        GBI.top->higher = current;
+        GBI.top = current;
+    }
+
+    if (!GBI.current || (GBI.current->type != current->type))
+    {
+
+        for (int i = 0; i <= 0xFF; i++)
+            GBI.cmd[i] = GBI_Unknown;
+
+        RDP_Init();
+        switch (current->type)
+        {
+            case F3D:       F3D_Init();     break;
+            case F3DEX:     F3DEX_Init();   break;
+            case F3DEX2:    F3DEX2_Init();  break;
+            case L3D:       L3D_Init();     break;
+            case L3DEX:     L3DEX_Init();   break;
+            case L3DEX2:    L3DEX2_Init();  break;
+            case S2DEX:     S2DEX_Init();   break;
+            case S2DEX2:    S2DEX2_Init();  break;
+            case F3DDKR:    F3DDKR_Init();  break;
+            case F3DWRUS:   F3DWRUS_Init(); break;
+            case F3DPD:     F3DPD_Init();   break;
+            case F3DCBFD:   F3DCBFD_Init(); break;
+        }
+    }
+
+
+    GBI.current = current;
+}
+
diff --git a/source/gles2n64/src/GBI.h b/source/gles2n64/src/GBI.h
new file mode 100644 (file)
index 0000000..13bd6bb
--- /dev/null
@@ -0,0 +1,820 @@
+#ifndef GBI_H
+#define GBI_H
+#include "Hash.h"
+#include "Types.h"
+#include <stdio.h>
+
+// Microcode Types
+#define F3D         0
+#define F3DEX       1
+#define F3DEX2      2
+#define L3D         3
+#define L3DEX       4
+#define L3DEX2      5
+#define S2DEX       6
+#define S2DEX2      7
+#define F3DPD       8
+#define F3DDKR      9
+#define F3DWRUS     10
+#define F3DCBFD     11
+#define NONE        12
+
+#ifdef MAINDEF
+const char *MicrocodeTypes[] =
+{
+    "Fast3D",
+    "F3DEX",
+    "F3DEX2",
+    "Line3D",
+    "L3DEX",
+    "L3DEX2",
+    "S2DEX",
+    "S2DEX2",
+    "Perfect Dark",
+    "DKR/JFG",
+    "Waverace US",
+    "Conker's Bad Fur Day",
+    "None",
+};
+#else
+extern const char *MicrocodeTypes[];
+#endif
+
+static const int numMicrocodeTypes = 11;
+
+// Fixed point conversion factors
+#define FIXED2FLOATRECIP1   0.5f
+#define FIXED2FLOATRECIP2   0.25f
+#define FIXED2FLOATRECIP3   0.125f
+#define FIXED2FLOATRECIP4   0.0625f
+#define FIXED2FLOATRECIP5   0.03125f
+#define FIXED2FLOATRECIP6   0.015625f
+#define FIXED2FLOATRECIP7   0.0078125f
+#define FIXED2FLOATRECIP8   0.00390625f
+#define FIXED2FLOATRECIP9   0.001953125f
+#define FIXED2FLOATRECIP10  0.0009765625f
+#define FIXED2FLOATRECIP11  0.00048828125f
+#define FIXED2FLOATRECIP12  0.00024414063f
+#define FIXED2FLOATRECIP13  0.00012207031f
+#define FIXED2FLOATRECIP14  6.1035156e-05f
+#define FIXED2FLOATRECIP15  3.0517578e-05f
+#define FIXED2FLOATRECIP16  1.5258789e-05f
+
+#define _FIXED2FLOAT( v, b ) \
+    ((f32)v * FIXED2FLOATRECIP##b)
+
+// Useful macros for decoding GBI command's parameters
+#define _SHIFTL( v, s, w )  \
+    (((u32)v & ((0x01 << w) - 1)) << s)
+#define _SHIFTR( v, s, w )  \
+    (((u32)v >> s) & ((0x01 << w) - 1))
+
+// BG flags
+#define G_BGLT_LOADBLOCK    0x0033
+#define G_BGLT_LOADTILE     0xfff4
+
+#define G_BG_FLAG_FLIPS     0x01
+#define G_BG_FLAG_FLIPT     0x10
+
+// Sprite object render modes
+#define G_OBJRM_NOTXCLAMP       0x01
+#define G_OBJRM_XLU             0x02    /* Ignored */
+#define G_OBJRM_ANTIALIAS       0x04    /* Ignored */
+#define G_OBJRM_BILERP          0x08
+#define G_OBJRM_SHRINKSIZE_1    0x10
+#define G_OBJRM_SHRINKSIZE_2    0x20
+#define G_OBJRM_WIDEN           0x40
+
+// Sprite texture loading types
+#define G_OBJLT_TXTRBLOCK   0x00001033
+#define G_OBJLT_TXTRTILE    0x00fc1034
+#define G_OBJLT_TLUT        0x00000030
+
+
+// These are all the constant flags
+#define G_ZBUFFER               0x00000001
+#define G_SHADE                 0x00000004
+#define G_FOG                   0x00010000
+#define G_LIGHTING              0x00020000
+#define G_TEXTURE_GEN           0x00040000
+#define G_TEXTURE_GEN_LINEAR    0x00080000
+#define G_LOD                   0x00100000
+
+#define G_MV_MMTX       2
+#define G_MV_PMTX       6
+#define G_MV_LIGHT      10
+#define G_MV_POINT      12
+#define G_MV_MATRIX     14
+
+#define G_MVO_LOOKATX   0
+#define G_MVO_LOOKATY   24
+#define G_MVO_L0        48
+#define G_MVO_L1        72
+#define G_MVO_L2        96
+#define G_MVO_L3        120
+#define G_MVO_L4        144
+#define G_MVO_L5        168
+#define G_MVO_L6        192
+#define G_MVO_L7        216
+
+#define G_MV_LOOKATY    0x82
+#define G_MV_LOOKATX    0x84
+#define G_MV_L0         0x86
+#define G_MV_L1         0x88
+#define G_MV_L2         0x8a
+#define G_MV_L3         0x8c
+#define G_MV_L4         0x8e
+#define G_MV_L5         0x90
+#define G_MV_L6         0x92
+#define G_MV_L7         0x94
+#define G_MV_TXTATT     0x96
+#define G_MV_MATRIX_1   0x9E
+#define G_MV_MATRIX_2   0x98
+#define G_MV_MATRIX_3   0x9A
+#define G_MV_MATRIX_4   0x9C
+
+#define G_MW_MATRIX         0x00
+#define G_MW_NUMLIGHT       0x02
+#define G_MW_CLIP           0x04
+#define G_MW_SEGMENT        0x06
+#define G_MW_FOG            0x08
+#define G_MW_LIGHTCOL       0x0A
+#define G_MW_FORCEMTX       0x0C
+#define G_MW_POINTS         0x0C
+#define G_MW_PERSPNORM      0x0E
+#define G_MV_COORDMOD       0x10    //Conker Bad Fur Day
+
+#define G_MWO_NUMLIGHT      0x00
+#define G_MWO_CLIP_RNX      0x04
+#define G_MWO_CLIP_RNY      0x0c
+#define G_MWO_CLIP_RPX      0x14
+#define G_MWO_CLIP_RPY      0x1c
+#define G_MWO_SEGMENT_0     0x00
+#define G_MWO_SEGMENT_1     0x01
+#define G_MWO_SEGMENT_2     0x02
+#define G_MWO_SEGMENT_3     0x03
+#define G_MWO_SEGMENT_4     0x04
+#define G_MWO_SEGMENT_5     0x05
+#define G_MWO_SEGMENT_6     0x06
+#define G_MWO_SEGMENT_7     0x07
+#define G_MWO_SEGMENT_8     0x08
+#define G_MWO_SEGMENT_9     0x09
+#define G_MWO_SEGMENT_A     0x0a
+#define G_MWO_SEGMENT_B     0x0b
+#define G_MWO_SEGMENT_C     0x0c
+#define G_MWO_SEGMENT_D     0x0d
+#define G_MWO_SEGMENT_E     0x0e
+#define G_MWO_SEGMENT_F     0x0f
+#define G_MWO_FOG           0x00
+
+#define G_MWO_MATRIX_XX_XY_I    0x00
+#define G_MWO_MATRIX_XZ_XW_I    0x04
+#define G_MWO_MATRIX_YX_YY_I    0x08
+#define G_MWO_MATRIX_YZ_YW_I    0x0C
+#define G_MWO_MATRIX_ZX_ZY_I    0x10
+#define G_MWO_MATRIX_ZZ_ZW_I    0x14
+#define G_MWO_MATRIX_WX_WY_I    0x18
+#define G_MWO_MATRIX_WZ_WW_I    0x1C
+#define G_MWO_MATRIX_XX_XY_F    0x20
+#define G_MWO_MATRIX_XZ_XW_F    0x24
+#define G_MWO_MATRIX_YX_YY_F    0x28
+#define G_MWO_MATRIX_YZ_YW_F    0x2C
+#define G_MWO_MATRIX_ZX_ZY_F    0x30
+#define G_MWO_MATRIX_ZZ_ZW_F    0x34
+#define G_MWO_MATRIX_WX_WY_F    0x38
+#define G_MWO_MATRIX_WZ_WW_F    0x3C
+#define G_MWO_POINT_RGBA        0x10
+#define G_MWO_POINT_ST          0x14
+#define G_MWO_POINT_XYSCREEN    0x18
+#define G_MWO_POINT_ZSCREEN     0x1C
+
+#ifdef DEBUG
+static const char *MWOPointText[] =
+{
+    "G_MWO_POINT_RGBA",
+    "G_MWO_POINT_ST",
+    "G_MWO_POINT_XYSCREEN",
+    "G_MWO_POINT_ZSCREEN"
+};
+
+static const char *MWOMatrixText[] =
+{
+    "G_MWO_MATRIX_XX_XY_I", "G_MWO_MATRIX_XZ_XW_I", "G_MWO_MATRIX_YX_YY_I", "G_MWO_MATRIX_YZ_YW_I",
+    "G_MWO_MATRIX_ZX_ZY_I", "G_MWO_MATRIX_ZZ_ZW_I", "G_MWO_MATRIX_WX_WY_I", "G_MWO_MATRIX_WZ_WW_I",
+    "G_MWO_MATRIX_XX_XY_F", "G_MWO_MATRIX_XZ_XW_F", "G_MWO_MATRIX_YX_YY_F", "G_MWO_MATRIX_YZ_YW_F",
+    "G_MWO_MATRIX_ZX_ZY_F", "G_MWO_MATRIX_ZZ_ZW_F", "G_MWO_MATRIX_WX_WY_F", "G_MWO_MATRIX_WZ_WW_F"
+};
+#endif
+
+// These flags change between ucodes
+extern u32 G_MTX_STACKSIZE;
+
+extern u32 G_MTX_MODELVIEW;
+extern u32 G_MTX_PROJECTION;
+extern u32 G_MTX_MUL;
+extern u32 G_MTX_LOAD;
+extern u32 G_MTX_NOPUSH;
+extern u32 G_MTX_PUSH;
+
+extern u32 G_TEXTURE_ENABLE;
+extern u32 G_SHADING_SMOOTH;
+extern u32 G_CULL_FRONT;
+extern u32 G_CULL_BACK;
+extern u32 G_CULL_BOTH;
+extern u32 G_CLIPPING;
+
+extern u32 G_MV_VIEWPORT;
+
+extern u32 G_MWO_aLIGHT_1, G_MWO_bLIGHT_1;
+extern u32 G_MWO_aLIGHT_2, G_MWO_bLIGHT_2;
+extern u32 G_MWO_aLIGHT_3, G_MWO_bLIGHT_3;
+extern u32 G_MWO_aLIGHT_4, G_MWO_bLIGHT_4;
+extern u32 G_MWO_aLIGHT_5, G_MWO_bLIGHT_5;
+extern u32 G_MWO_aLIGHT_6, G_MWO_bLIGHT_6;
+extern u32 G_MWO_aLIGHT_7, G_MWO_bLIGHT_7;
+extern u32 G_MWO_aLIGHT_8, G_MWO_bLIGHT_8;
+
+// Image formats
+#define G_IM_FMT_RGBA   0
+#define G_IM_FMT_YUV    1
+#define G_IM_FMT_CI     2
+#define G_IM_FMT_IA     3
+#define G_IM_FMT_I      4
+#define G_IM_FMT_CI_IA  5   //not real
+
+// Image sizes
+#define G_IM_SIZ_4b     0
+#define G_IM_SIZ_8b     1
+#define G_IM_SIZ_16b    2
+#define G_IM_SIZ_32b    3
+#define G_IM_SIZ_DD     5
+
+#define G_TX_MIRROR     0x1
+#define G_TX_CLAMP      0x2
+
+#ifdef DEBUG
+static const char *ImageFormatText[] =
+{
+    "G_IM_FMT_RGBA",
+    "G_IM_FMT_YUV",
+    "G_IM_FMT_CI",
+    "G_IM_FMT_IA",
+    "G_IM_FMT_I",
+    "G_IM_FMT_INVALID",
+    "G_IM_FMT_INVALID",
+    "G_IM_FMT_INVALID"
+};
+
+static const char *ImageSizeText[] =
+{
+    "G_IM_SIZ_4b",
+    "G_IM_SIZ_8b",
+    "G_IM_SIZ_16b",
+    "G_IM_SIZ_32b"
+};
+
+static const char *SegmentText[] =
+{
+    "G_MWO_SEGMENT_0", "G_MWO_SEGMENT_1", "G_MWO_SEGMENT_2", "G_MWO_SEGMENT_3",
+    "G_MWO_SEGMENT_4", "G_MWO_SEGMENT_5", "G_MWO_SEGMENT_6", "G_MWO_SEGMENT_7",
+    "G_MWO_SEGMENT_8", "G_MWO_SEGMENT_9", "G_MWO_SEGMENT_A", "G_MWO_SEGMENT_B",
+    "G_MWO_SEGMENT_C", "G_MWO_SEGMENT_D", "G_MWO_SEGMENT_E", "G_MWO_SEGMENT_F"
+};
+#endif
+
+#define G_NOOP                  0x00
+
+#define G_IMMFIRST              -65
+
+// These GBI commands are common to all ucodes
+#define G_SETCIMG               0xFF    /*  -1 */
+#define G_SETZIMG               0xFE    /*  -2 */
+#define G_SETTIMG               0xFD    /*  -3 */
+#define G_SETCOMBINE            0xFC    /*  -4 */
+#define G_SETENVCOLOR           0xFB    /*  -5 */
+#define G_SETPRIMCOLOR          0xFA    /*  -6 */
+#define G_SETBLENDCOLOR         0xF9    /*  -7 */
+#define G_SETFOGCOLOR           0xF8    /*  -8 */
+#define G_SETFILLCOLOR          0xF7    /*  -9 */
+#define G_FILLRECT              0xF6    /* -10 */
+#define G_SETTILE               0xF5    /* -11 */
+#define G_LOADTILE              0xF4    /* -12 */
+#define G_LOADBLOCK             0xF3    /* -13 */
+#define G_SETTILESIZE           0xF2    /* -14 */
+#define G_LOADTLUT              0xF0    /* -16 */
+#define G_RDPSETOTHERMODE       0xEF    /* -17 */
+#define G_SETPRIMDEPTH          0xEE    /* -18 */
+#define G_SETSCISSOR            0xED    /* -19 */
+#define G_SETCONVERT            0xEC    /* -20 */
+#define G_SETKEYR               0xEB    /* -21 */
+#define G_SETKEYGB              0xEA    /* -22 */
+#define G_RDPFULLSYNC           0xE9    /* -23 */
+#define G_RDPTILESYNC           0xE8    /* -24 */
+#define G_RDPPIPESYNC           0xE7    /* -25 */
+#define G_RDPLOADSYNC           0xE6    /* -26 */
+#define G_TEXRECTFLIP           0xE5    /* -27 */
+#define G_TEXRECT               0xE4    /* -28 */
+
+#define G_RDPNOOP               0xC0
+
+#define G_TRI_FILL              0xC8    /* fill triangle:            11001000 */
+#define G_TRI_FILL_ZBUFF        0xC9    /* fill, zbuff triangle:     11001001 */
+#define G_TRI_TXTR              0xCA    /* texture triangle:         11001010 */
+#define G_TRI_TXTR_ZBUFF        0xCB    /* texture, zbuff triangle:  11001011 */
+#define G_TRI_SHADE             0xCC    /* shade triangle:           11001100 */
+#define G_TRI_SHADE_ZBUFF       0xCD    /* shade, zbuff triangle:    11001101 */
+#define G_TRI_SHADE_TXTR        0xCE    /* shade, texture triangle:  11001110 */
+#define G_TRI_SHADE_TXTR_ZBUFF  0xCF    /* shade, txtr, zbuff trngl: 11001111 */
+
+/*
+ * G_SETOTHERMODE_L sft: shift count
+ */
+#define G_MDSFT_ALPHACOMPARE    0
+#define G_MDSFT_ZSRCSEL         2
+#define G_MDSFT_RENDERMODE      3
+#define G_MDSFT_BLENDER         16
+
+/*
+ * G_SETOTHERMODE_H sft: shift count
+ */
+#define G_MDSFT_BLENDMASK       0   /* unsupported */
+#define G_MDSFT_ALPHADITHER     4
+#define G_MDSFT_RGBDITHER       6
+
+#define G_MDSFT_COMBKEY         8
+#define G_MDSFT_TEXTCONV        9
+#define G_MDSFT_TEXTFILT        12
+#define G_MDSFT_TEXTLUT         14
+#define G_MDSFT_TEXTLOD         16
+#define G_MDSFT_TEXTDETAIL      17
+#define G_MDSFT_TEXTPERSP       19
+#define G_MDSFT_CYCLETYPE       20
+#define G_MDSFT_COLORDITHER     22  /* unsupported in HW 2.0 */
+#define G_MDSFT_PIPELINE        23
+
+/* G_SETOTHERMODE_H gPipelineMode */
+#define G_PM_1PRIMITIVE         1
+#define G_PM_NPRIMITIVE         0
+
+/* G_SETOTHERMODE_H gSetCycleType */
+#define G_CYC_1CYCLE            0
+#define G_CYC_2CYCLE            1
+#define G_CYC_COPY              2
+#define G_CYC_FILL              3
+
+/* G_SETOTHERMODE_H gSetTexturePersp */
+#define G_TP_NONE               0
+#define G_TP_PERSP              1
+
+/* G_SETOTHERMODE_H gSetTextureDetail */
+#define G_TD_CLAMP              0
+#define G_TD_SHARPEN            1
+#define G_TD_DETAIL             2
+
+/* G_SETOTHERMODE_H gSetTextureLOD */
+#define G_TL_TILE               0
+#define G_TL_LOD                1
+
+/* G_SETOTHERMODE_H gSetTextureLUT */
+#define G_TT_NONE               0
+#define G_TT_RGBA16             2
+#define G_TT_IA16               3
+
+/* G_SETOTHERMODE_H gSetTextureFilter */
+#define G_TF_POINT              0
+#define G_TF_AVERAGE            3
+#define G_TF_BILERP             2
+
+/* G_SETOTHERMODE_H gSetTextureConvert */
+#define G_TC_CONV               0
+#define G_TC_FILTCONV           5
+#define G_TC_FILT               6
+
+/* G_SETOTHERMODE_H gSetCombineKey */
+#define G_CK_NONE               0
+#define G_CK_KEY                1
+
+/* G_SETOTHERMODE_H gSetColorDither */
+#define G_CD_MAGICSQ            0
+#define G_CD_BAYER              1
+#define G_CD_NOISE              2
+
+#define G_CD_DISABLE            3
+#define G_CD_ENABLE             G_CD_NOISE  /* HW 1.0 compatibility mode */
+
+/* G_SETOTHERMODE_H gSetAlphaDither */
+#define G_AD_PATTERN            0
+#define G_AD_NOTPATTERN         1
+#define G_AD_NOISE              2
+#define G_AD_DISABLE            3
+
+/* G_SETOTHERMODE_L gSetAlphaCompare */
+#define G_AC_NONE               0
+#define G_AC_THRESHOLD          1
+#define G_AC_DITHER             3
+
+/* G_SETOTHERMODE_L gSetDepthSource */
+#define G_ZS_PIXEL              0
+#define G_ZS_PRIM               1
+
+/* G_SETOTHERMODE_L gSetRenderMode */
+#define AA_EN                   1
+#define Z_CMP                   1
+#define Z_UPD                   1
+#define IM_RD                   1
+#define CLR_ON_CVG              1
+#define CVG_DST_CLAMP           0
+#define CVG_DST_WRAP            1
+#define CVG_DST_FULL            2
+#define CVG_DST_SAVE            3
+#define ZMODE_OPA               0
+#define ZMODE_INTER             1
+#define ZMODE_XLU               2
+#define ZMODE_DEC               3
+#define CVG_X_ALPHA             1
+#define ALPHA_CVG_SEL           1
+#define FORCE_BL                1
+#define TEX_EDGE                0 // not used
+
+#define G_SC_NON_INTERLACE      0
+#define G_SC_EVEN_INTERLACE     2
+#define G_SC_ODD_INTERLACE      3
+
+#ifdef DEBUG
+static const char *AAEnableText = "AA_EN";
+static const char *DepthCompareText = "Z_CMP";
+static const char *DepthUpdateText = "Z_UPD";
+static const char *ClearOnCvgText = "CLR_ON_CVG";
+static const char *CvgXAlphaText = "CVG_X_ALPHA";
+static const char *AlphaCvgSelText = "ALPHA_CVG_SEL";
+static const char *ForceBlenderText = "FORCE_BL";
+
+static const char *AlphaCompareText[] =
+{
+    "G_AC_NONE", "G_AC_THRESHOLD", "G_AC_INVALID", "G_AC_DITHER"
+};
+
+static const char *DepthSourceText[] =
+{
+    "G_ZS_PIXEL", "G_ZS_PRIM"
+};
+
+static const char *AlphaDitherText[] =
+{
+    "G_AD_PATTERN", "G_AD_NOTPATTERN", "G_AD_NOISE", "G_AD_DISABLE"
+};
+
+static const char *ColorDitherText[] =
+{
+    "G_CD_MAGICSQ", "G_CD_BAYER", "G_CD_NOISE", "G_CD_DISABLE"
+};
+
+static const char *CombineKeyText[] =
+{
+    "G_CK_NONE", "G_CK_KEY"
+};
+
+static const char *TextureConvertText[] =
+{
+    "G_TC_CONV", "G_TC_INVALID", "G_TC_INVALID", "G_TC_INVALID", "G_TC_INVALID", "G_TC_FILTCONV", "G_TC_FILT", "G_TC_INVALID"
+};
+
+static const char *TextureFilterText[] =
+{
+    "G_TF_POINT", "G_TF_INVALID", "G_TF_BILERP", "G_TF_AVERAGE"
+};
+
+static const char *TextureLUTText[] =
+{
+    "G_TT_NONE", "G_TT_INVALID", "G_TT_RGBA16", "G_TT_IA16"
+};
+
+static const char *TextureLODText[] =
+{
+    "G_TL_TILE", "G_TL_LOD"
+};
+
+static const char *TextureDetailText[] =
+{
+    "G_TD_CLAMP", "G_TD_SHARPEN", "G_TD_DETAIL"
+};
+
+static const char *TexturePerspText[] =
+{
+    "G_TP_NONE", "G_TP_PERSP"
+};
+
+static const char *CycleTypeText[] =
+{
+    "G_CYC_1CYCLE", "G_CYC_2CYCLE", "G_CYC_COPY", "G_CYC_FILL"
+};
+
+static const char *PipelineModeText[] =
+{
+    "G_PM_NPRIMITIVE", "G_PM_1PRIMITIVE"
+};
+
+static const char *CvgDestText[] =
+{
+    "CVG_DST_CLAMP", "CVG_DST_WRAP", "CVG_DST_FULL", "CVG_DST_SAVE"
+};
+
+static const char *DepthModeText[] =
+{
+    "ZMODE_OPA", "ZMODE_INTER", "ZMODE_XLU", "ZMODE_DEC"
+};
+
+static const char *ScissorModeText[] =
+{
+    "G_SC_NON_INTERLACE", "G_SC_INVALID", "G_SC_EVEN_INTERLACE", "G_SC_ODD_INTERLACE"
+};
+#endif
+
+/* Color combiner constants: */
+#define G_CCMUX_COMBINED        0
+#define G_CCMUX_TEXEL0          1
+#define G_CCMUX_TEXEL1          2
+#define G_CCMUX_PRIMITIVE       3
+#define G_CCMUX_SHADE           4
+#define G_CCMUX_ENVIRONMENT     5
+#define G_CCMUX_CENTER          6
+#define G_CCMUX_SCALE           6
+#define G_CCMUX_COMBINED_ALPHA  7
+#define G_CCMUX_TEXEL0_ALPHA    8
+#define G_CCMUX_TEXEL1_ALPHA    9
+#define G_CCMUX_PRIMITIVE_ALPHA 10
+#define G_CCMUX_SHADE_ALPHA     11
+#define G_CCMUX_ENV_ALPHA       12
+#define G_CCMUX_LOD_FRACTION    13
+#define G_CCMUX_PRIM_LOD_FRAC   14
+#define G_CCMUX_NOISE           7
+#define G_CCMUX_K4              7
+#define G_CCMUX_K5              15
+#define G_CCMUX_1               6
+#define G_CCMUX_0               31
+
+/* Alpha combiner constants: */
+#define G_ACMUX_COMBINED        0
+#define G_ACMUX_TEXEL0          1
+#define G_ACMUX_TEXEL1          2
+#define G_ACMUX_PRIMITIVE       3
+#define G_ACMUX_SHADE           4
+#define G_ACMUX_ENVIRONMENT     5
+#define G_ACMUX_LOD_FRACTION    0
+#define G_ACMUX_PRIM_LOD_FRAC   6
+#define G_ACMUX_1               6
+#define G_ACMUX_0               7
+
+#ifdef DEBUG
+static const char *saRGBText[] =
+{
+    "COMBINED",         "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "NOISE",            "1",
+    "0",                "0",                "0",                "0",
+    "0",                "0",                "0",                "0"
+};
+
+static const char *sbRGBText[] =
+{
+    "COMBINED",         "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "CENTER",           "K4",
+    "0",                "0",                "0",                "0",
+    "0",                "0",                "0",                "0"
+};
+
+static const char *mRGBText[] =
+{
+    "COMBINED",         "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "SCALE",            "COMBINED_ALPHA",
+    "TEXEL0_ALPHA",     "TEXEL1_ALPHA",     "PRIMITIVE_ALPHA",  "SHADE_ALPHA",
+    "ENV_ALPHA",        "LOD_FRACTION",     "PRIM_LOD_FRAC",    "K5",
+    "0",                "0",                "0",                "0",
+    "0",                "0",                "0",                "0",
+    "0",                "0",                "0",                "0",
+    "0",                "0",                "0",                "0"
+};
+
+static const char *aRGBText[] =
+{
+    "COMBINED",         "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "1",                "0",
+};
+
+static const char *saAText[] =
+{
+    "COMBINED",         "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "1",                "0",
+};
+
+static const char *sbAText[] =
+{
+    "COMBINED",         "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "1",                "0",
+};
+
+static const char *mAText[] =
+{
+    "LOD_FRACTION",     "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "PRIM_LOD_FRAC",    "0",
+};
+
+static const char *aAText[] =
+{
+    "COMBINED",         "TEXEL0",           "TEXEL1",           "PRIMITIVE",
+    "SHADE",            "ENVIRONMENT",      "1",                "0",
+};
+#endif
+
+extern u32 G_RDPHALF_1, G_RDPHALF_2, G_RDPHALF_CONT;
+extern u32 G_SPNOOP;
+extern u32 G_SETOTHERMODE_H, G_SETOTHERMODE_L;
+extern u32 G_DL, G_ENDDL, G_CULLDL, G_BRANCH_Z;
+extern u32 G_LOAD_UCODE;
+extern u32 G_MOVEMEM, G_MOVEWORD;
+extern u32 G_MTX, G_POPMTX;
+extern u32 G_GEOMETRYMODE, G_SETGEOMETRYMODE, G_CLEARGEOMETRYMODE;
+extern u32 G_TEXTURE;
+extern u32 G_DMA_IO, G_DMA_DL, G_DMA_TRI, G_DMA_MTX, G_DMA_VTX, G_DMA_OFFSETS;
+extern u32 G_SPECIAL_1, G_SPECIAL_2, G_SPECIAL_3;
+extern u32 G_VTX, G_MODIFYVTX, G_VTXCOLORBASE;
+extern u32 G_TRI1, G_TRI2, G_TRI4;
+extern u32 G_QUAD, G_LINE3D;
+extern u32 G_RESERVED0, G_RESERVED1, G_RESERVED2, G_RESERVED3;
+extern u32 G_SPRITE2D_BASE;
+extern u32 G_BG_1CYC, G_BG_COPY;
+extern u32 G_OBJ_RECTANGLE, G_OBJ_SPRITE, G_OBJ_MOVEMEM;
+extern u32 G_SELECT_DL, G_OBJ_RENDERMODE, G_OBJ_RECTANGLE_R;
+extern u32 G_OBJ_LOADTXTR, G_OBJ_LDTX_SPRITE, G_OBJ_LDTX_RECT, G_OBJ_LDTX_RECT_R;
+extern u32 G_RDPHALF_0, G_TRI_UNKNOWN;
+
+#define LIGHT_1 1
+#define LIGHT_2 2
+#define LIGHT_3 3
+#define LIGHT_4 4
+#define LIGHT_5 5
+#define LIGHT_6 6
+#define LIGHT_7 7
+#define LIGHT_8 8
+
+#define G_DL_PUSH       0x00
+#define G_DL_NOPUSH     0x01
+
+typedef struct
+{
+    s16 y;
+    s16 x;
+
+    u16 flag;
+    s16 z;
+
+    s16 t;
+    s16 s;
+
+    union {
+        struct
+        {
+            u8 a;
+            u8 b;
+            u8 g;
+            u8 r;
+        } color;
+        struct
+        {
+            s8 a;
+            s8 z;   // b
+            s8 y;   //g
+            s8 x;   //r
+        } normal;
+    };
+} Vertex;
+
+typedef struct
+{
+    s16 y, x;
+    u16 ci;
+    s16 z;
+    s16 t, s;
+} PDVertex;
+
+
+typedef struct
+{
+    u8      v2, v1, v0, flag;
+    s16     t0, s0;
+    s16     t1, s1;
+    s16     t2, s2;
+} DKRTriangle;
+
+struct Light
+{
+    u8 pad0, b, g, r;
+    u8 pad1, b2, g2, r2;
+    s8 pad2, z, y, x;
+};
+
+struct LightMM
+{
+    u8 pad0, b, g, r;
+    u8 pad1, b2, g2, r2;
+    s16 y, x, range, z;
+};
+
+
+// GBI commands
+typedef void (*GBIFunc)( u32 w0, u32 w1 );
+//extern GBIFunc GBICmd[256];
+
+struct SpecialMicrocodeInfo
+{
+    u32 type;
+    u32 NoN;
+    u32 crc;
+    const char *text;
+};
+
+struct MicrocodeInfo
+{
+    u32 address, dataAddress;
+    u16 dataSize;
+    u32 type;
+    u32 NoN;
+    u32 crc;
+    u32 *text;
+
+    MicrocodeInfo *higher, *lower;
+};
+
+struct GBIInfo
+{
+    GBIFunc cmd[256];
+
+    u32 PCStackSize, numMicrocodes;
+    MicrocodeInfo *current, *top, *bottom;
+
+#ifdef PROFILE_GBI
+    unsigned int profileTimer[256 * 12];
+    unsigned int profileNum[256 * 12];
+    unsigned int profileTmp;
+#endif
+};
+
+extern GBIInfo GBI;
+
+#ifdef PROFILE_GBI
+void GBI_ProfileReset();
+void GBI_ProfileInit();
+void GBI_ProfileBegin(u32 cmd);
+void GBI_ProfileEnd(u32 cmd);
+u32  GBI_ProfilePrint(FILE *file);
+const char* GBI_GetFuncName(u32 ucode, u32 cmd);
+u32  GBI_GetFuncTime(u32 ucode, u32 cmd);
+#endif
+
+void GBI_MakeCurrent( MicrocodeInfo *current );
+MicrocodeInfo *GBI_DetectMicrocode( u32 uc_start, u32 uc_dstart, u16 uc_dsize );
+extern u32 last_good_ucode;
+void GBI_Init();
+void GBI_Destroy();
+
+// Allows easier setting of GBI commands
+#define GBI_SetGBI( command, value, function ) \
+    command = value; \
+    GBI.cmd[command] = function
+
+#define GBI_InitFlags( ucode ) \
+    G_MTX_STACKSIZE     = ucode##_MTX_STACKSIZE; \
+    G_MTX_MODELVIEW     = ucode##_MTX_MODELVIEW; \
+    G_MTX_PROJECTION    = ucode##_MTX_PROJECTION; \
+    G_MTX_MUL           = ucode##_MTX_MUL; \
+    G_MTX_LOAD          = ucode##_MTX_LOAD; \
+    G_MTX_NOPUSH        = ucode##_MTX_NOPUSH; \
+    G_MTX_PUSH          = ucode##_MTX_PUSH; \
+\
+    G_TEXTURE_ENABLE    = ucode##_TEXTURE_ENABLE; \
+    G_SHADING_SMOOTH    = ucode##_SHADING_SMOOTH; \
+    G_CULL_FRONT        = ucode##_CULL_FRONT; \
+    G_CULL_BACK         = ucode##_CULL_BACK; \
+    G_CULL_BOTH         = ucode##_CULL_BOTH; \
+    G_CLIPPING          = ucode##_CLIPPING; \
+\
+    G_MV_VIEWPORT       = ucode##_MV_VIEWPORT; \
+\
+    G_MWO_aLIGHT_1      = ucode##_MWO_aLIGHT_1; \
+    G_MWO_bLIGHT_1      = ucode##_MWO_bLIGHT_1; \
+    G_MWO_aLIGHT_2      = ucode##_MWO_aLIGHT_2; \
+    G_MWO_bLIGHT_2      = ucode##_MWO_bLIGHT_2; \
+    G_MWO_aLIGHT_3      = ucode##_MWO_aLIGHT_3; \
+    G_MWO_bLIGHT_3      = ucode##_MWO_bLIGHT_3; \
+    G_MWO_aLIGHT_4      = ucode##_MWO_aLIGHT_4; \
+    G_MWO_bLIGHT_4      = ucode##_MWO_bLIGHT_4; \
+    G_MWO_aLIGHT_5      = ucode##_MWO_aLIGHT_5; \
+    G_MWO_bLIGHT_5      = ucode##_MWO_bLIGHT_5; \
+    G_MWO_aLIGHT_6      = ucode##_MWO_aLIGHT_6; \
+    G_MWO_bLIGHT_6      = ucode##_MWO_bLIGHT_6; \
+    G_MWO_aLIGHT_7      = ucode##_MWO_aLIGHT_7; \
+    G_MWO_bLIGHT_7      = ucode##_MWO_bLIGHT_7; \
+    G_MWO_aLIGHT_8      = ucode##_MWO_aLIGHT_8; \
+    G_MWO_bLIGHT_8      = ucode##_MWO_bLIGHT_8;
+
+#endif
+
diff --git a/source/gles2n64/src/Hash.h b/source/gles2n64/src/Hash.h
new file mode 100644 (file)
index 0000000..1f26ec9
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef __HASH_H__
+#define __HASH_H__
+
+#include <stdlib.h>
+
+template<typename T>
+class HashMap
+{
+public:
+    void init(unsigned power2)
+    {
+        _mask = (1 << power2) - 1;
+        _hashmap = (T**)malloc((_mask+1) * sizeof(T*));
+        reset();
+    }
+
+    void destroy()
+    {
+        free(_hashmap);
+    }
+
+    void reset()
+    {
+        memset(_hashmap, 0, (_mask+1) * sizeof(T*));
+    }
+
+    void insert(unsigned hash, T* data)
+    {
+        _hashmap[hash & _mask] = data;
+    }
+
+    T* find(unsigned hash)
+    {
+        return _hashmap[hash & _mask];
+    }
+
+protected:
+    T **_hashmap;
+    unsigned _mask;
+};
+
+#endif
diff --git a/source/gles2n64/src/L3D.cpp b/source/gles2n64/src/L3D.cpp
new file mode 100644 (file)
index 0000000..72f9fe3
--- /dev/null
@@ -0,0 +1,57 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "L3D.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+
+void L3D_Line3D( u32 w0, u32 w1 )
+{
+    u32 wd = _SHIFTR( w1, 0, 8 );
+
+    if (wd == 0)
+        gSPLine3D( _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, _SHIFTR( w1, 24, 8 ) );
+    else
+        gSPLineW3D( _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, wd, _SHIFTR( w1, 24, 8 ) );
+}
+
+void L3D_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3D );
+
+    GBI.PCStackSize = 10;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_MTX,                  F3D_MTX,                F3D_Mtx );
+    GBI_SetGBI( G_RESERVED0,            F3D_RESERVED0,          F3D_Reserved0 );
+    GBI_SetGBI( G_MOVEMEM,              F3D_MOVEMEM,            F3D_MoveMem );
+    GBI_SetGBI( G_VTX,                  F3D_VTX,                F3D_Vtx );
+    GBI_SetGBI( G_RESERVED1,            F3D_RESERVED1,          F3D_Reserved1 );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_RESERVED2,            F3D_RESERVED2,          F3D_Reserved2 );
+    GBI_SetGBI( G_RESERVED3,            F3D_RESERVED3,          F3D_Reserved3 );
+    GBI_SetGBI( G_SPRITE2D_BASE,        F3D_SPRITE2D_BASE,      F3D_Sprite2D_Base );
+
+//  GBI_SetGBI( G_TRI1,                 F3D_TRI1,               F3D_Tri1 );
+    GBI_SetGBI( G_CULLDL,               F3D_CULLDL,             F3D_CullDL );
+    GBI_SetGBI( G_POPMTX,               F3D_POPMTX,             F3D_PopMtx );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3D_MoveWord );
+    GBI_SetGBI( G_TEXTURE,              F3D_TEXTURE,            F3D_Texture );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_SETGEOMETRYMODE,      F3D_SETGEOMETRYMODE,    F3D_SetGeometryMode );
+    GBI_SetGBI( G_CLEARGEOMETRYMODE,    F3D_CLEARGEOMETRYMODE,  F3D_ClearGeometryMode );
+    GBI_SetGBI( G_LINE3D,               L3D_LINE3D,             L3D_Line3D );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_RDPHALF_CONT,         F3D_RDPHALF_CONT,       F3D_RDPHalf_Cont );
+//  GBI_SetGBI( G_TRI4,                 F3D_TRI4,               F3D_Tri4 );
+}
+
diff --git a/source/gles2n64/src/L3D.h b/source/gles2n64/src/L3D.h
new file mode 100644 (file)
index 0000000..87f3b4f
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef L3D_H
+#define L3D_H
+#include "Types.h"
+
+#define L3D_LINE3D              0xB5
+
+void L3D_Line3D( u32 w0, u32 w1 );
+void L3D_Init();
+#endif
+
diff --git a/source/gles2n64/src/L3DEX.cpp b/source/gles2n64/src/L3DEX.cpp
new file mode 100644 (file)
index 0000000..2774c92
--- /dev/null
@@ -0,0 +1,61 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "L3D.h"
+#include "L3DEX.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+
+void L3DEX_Line3D( u32 w0, u32 w1 )
+{
+    u32 wd = _SHIFTR( w1, 0, 8 );
+
+    if (wd == 0)
+        gSPLine3D( _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), 0 );
+    else
+        gSPLineW3D( _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), wd, 0 );
+}
+
+void L3DEX_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3DEX );
+
+    GBI.PCStackSize = 18;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_MTX,                  F3D_MTX,                F3D_Mtx );
+    GBI_SetGBI( G_RESERVED0,            F3D_RESERVED0,          F3D_Reserved0 );
+    GBI_SetGBI( G_MOVEMEM,              F3D_MOVEMEM,            F3D_MoveMem );
+    GBI_SetGBI( G_VTX,                  F3D_VTX,                F3DEX_Vtx );
+    GBI_SetGBI( G_RESERVED1,            F3D_RESERVED1,          F3D_Reserved1 );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_RESERVED2,            F3D_RESERVED2,          F3D_Reserved2 );
+    GBI_SetGBI( G_RESERVED3,            F3D_RESERVED3,          F3D_Reserved3 );
+    GBI_SetGBI( G_SPRITE2D_BASE,        F3D_SPRITE2D_BASE,      F3D_Sprite2D_Base );
+
+//  GBI_SetGBI( G_TRI1,                 F3D_TRI1,               F3DEX_Tri1 );
+    GBI_SetGBI( G_CULLDL,               F3D_CULLDL,             F3DEX_CullDL );
+    GBI_SetGBI( G_POPMTX,               F3D_POPMTX,             F3D_PopMtx );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3D_MoveWord );
+    GBI_SetGBI( G_TEXTURE,              F3D_TEXTURE,            F3D_Texture );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_SETGEOMETRYMODE,      F3D_SETGEOMETRYMODE,    F3D_SetGeometryMode );
+    GBI_SetGBI( G_CLEARGEOMETRYMODE,    F3D_CLEARGEOMETRYMODE,  F3D_ClearGeometryMode );
+    GBI_SetGBI( G_LINE3D,               L3D_LINE3D,             L3DEX_Line3D );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_MODIFYVTX,            F3DEX_MODIFYVTX,        F3DEX_ModifyVtx );
+//  GBI_SetGBI( G_TRI2,                 F3DEX_TRI2,             F3DEX_Tri2 );
+    GBI_SetGBI( G_BRANCH_Z,             F3DEX_BRANCH_Z,         F3DEX_Branch_Z );
+    GBI_SetGBI( G_LOAD_UCODE,           F3DEX_LOAD_UCODE,       F3DEX_Load_uCode );
+}
+
diff --git a/source/gles2n64/src/L3DEX.h b/source/gles2n64/src/L3DEX.h
new file mode 100644 (file)
index 0000000..ddcfb41
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef L3DEX_H
+#define L3DEX_H
+#include "Types.h"
+
+void L3DEX_Line3D( u32 w0, u32 w1 );
+void L3DEX_Init();
+#endif
+
diff --git a/source/gles2n64/src/L3DEX2.cpp b/source/gles2n64/src/L3DEX2.cpp
new file mode 100644 (file)
index 0000000..4194a8b
--- /dev/null
@@ -0,0 +1,61 @@
+#include "gles2N64.h"
+#include "Debug.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "F3DEX2.h"
+#include "L3DEX2.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "GBI.h"
+
+void L3DEX2_Line3D( u32 w0, u32 w1 )
+{
+    u32 wd = _SHIFTR( w0, 0, 8 );
+
+    if (wd == 0)
+        gSPLine3D( _SHIFTR( w0, 17, 7 ), _SHIFTR( w0, 9, 7 ), 0 );
+    else
+        gSPLineW3D( _SHIFTR( w0, 17, 7 ), _SHIFTR( w0, 9, 7 ), wd, 0 );
+}
+
+void L3DEX2_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3DEX2 );
+
+    GBI.PCStackSize = 18;
+
+    // GBI Command                      Command Value               Command Function
+//  GBI_SetGBI( G_BG_COPY,              0x0A,                       S2DEX_BG_Copy );
+    GBI_SetGBI( G_RDPHALF_2,            F3DEX2_RDPHALF_2,           F3D_RDPHalf_2 );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3DEX2_SETOTHERMODE_H,      F3DEX2_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3DEX2_SETOTHERMODE_L,      F3DEX2_SetOtherMode_L );
+    GBI_SetGBI( G_RDPHALF_1,            F3DEX2_RDPHALF_1,           F3D_RDPHalf_1 );
+    GBI_SetGBI( G_SPNOOP,               F3DEX2_SPNOOP,              F3D_SPNoOp );
+    GBI_SetGBI( G_ENDDL,                F3DEX2_ENDDL,               F3D_EndDL );
+    GBI_SetGBI( G_DL,                   F3DEX2_DL,                  F3D_DList );
+    GBI_SetGBI( G_LOAD_UCODE,           F3DEX2_LOAD_UCODE,          F3DEX_Load_uCode );
+    GBI_SetGBI( G_MOVEMEM,              F3DEX2_MOVEMEM,             F3DEX2_MoveMem );
+    GBI_SetGBI( G_MOVEWORD,             F3DEX2_MOVEWORD,            F3DEX2_MoveWord );
+    GBI_SetGBI( G_MTX,                  F3DEX2_MTX,                 F3DEX2_Mtx );
+    GBI_SetGBI( G_GEOMETRYMODE,         F3DEX2_GEOMETRYMODE,        F3DEX2_GeometryMode );
+    GBI_SetGBI( G_POPMTX,               F3DEX2_POPMTX,              F3DEX2_PopMtx );
+    GBI_SetGBI( G_TEXTURE,              F3DEX2_TEXTURE,             F3DEX2_Texture );
+    GBI_SetGBI( G_DMA_IO,               F3DEX2_DMA_IO,              F3DEX2_DMAIO );
+    GBI_SetGBI( G_SPECIAL_1,            F3DEX2_SPECIAL_1,           F3DEX2_Special_1 );
+    GBI_SetGBI( G_SPECIAL_2,            F3DEX2_SPECIAL_2,           F3DEX2_Special_2 );
+    GBI_SetGBI( G_SPECIAL_3,            F3DEX2_SPECIAL_3,           F3DEX2_Special_3 );
+
+    GBI_SetGBI( G_VTX,                  F3DEX2_VTX,                 F3DEX2_Vtx );
+    GBI_SetGBI( G_MODIFYVTX,            F3DEX2_MODIFYVTX,           F3DEX_ModifyVtx );
+    GBI_SetGBI( G_CULLDL,               F3DEX2_CULLDL,              F3DEX_CullDL );
+    GBI_SetGBI( G_BRANCH_Z,             F3DEX2_BRANCH_Z,            F3DEX_Branch_Z );
+//  GBI_SetGBI( G_TRI1,                 F3DEX2_TRI1,                F3DEX2_Tri1 );
+//  GBI_SetGBI( G_TRI2,                 F3DEX2_TRI2,                F3DEX_Tri2 );
+//  GBI_SetGBI( G_QUAD,                 F3DEX2_QUAD,                F3DEX2_Quad );
+    GBI_SetGBI( G_LINE3D,               L3DEX2_LINE3D,              L3DEX2_Line3D );
+}
+
diff --git a/source/gles2n64/src/L3DEX2.h b/source/gles2n64/src/L3DEX2.h
new file mode 100644 (file)
index 0000000..dde6952
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef L3DEX2_H
+#define L3DEX2_H
+#include "Types.h"
+
+#define L3DEX2_LINE3D               0x08
+
+void L3DEX2_Line3D( u32 w0, u32 w1 );
+void L3DEX2_Init();
+#endif
+
diff --git a/source/gles2n64/src/N64.cpp b/source/gles2n64/src/N64.cpp
new file mode 100644 (file)
index 0000000..0eb902c
--- /dev/null
@@ -0,0 +1,11 @@
+#include "N64.h"
+#include "Types.h"
+
+u8 *DMEM;
+u8 *IMEM;
+u64 TMEM[512];
+u8 *RDRAM;
+u32 RDRAMSize;
+
+N64Regs REG;
+
diff --git a/source/gles2n64/src/N64.h b/source/gles2n64/src/N64.h
new file mode 100644 (file)
index 0000000..c56a2c8
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef N64_H
+#define N64_H
+
+#include "Types.h"
+
+#define MI_INTR_SP      0x1        // Bit 1: SP intr
+#define MI_INTR_DP      0x20        // Bit 5: DP intr 
+
+struct N64Regs
+{
+    u32 *MI_INTR;
+
+    u32 *DPC_START;
+    u32 *DPC_END;
+    u32 *DPC_CURRENT;
+    u32 *DPC_STATUS;
+    u32 *DPC_CLOCK;
+    u32 *DPC_BUFBUSY;
+    u32 *DPC_PIPEBUSY;
+    u32 *DPC_TMEM;
+
+    u32 *VI_STATUS;
+    u32 *VI_ORIGIN;
+    u32 *VI_WIDTH;
+    u32 *VI_INTR;
+    u32 *VI_V_CURRENT_LINE;
+    u32 *VI_TIMING;
+    u32 *VI_V_SYNC;
+    u32 *VI_H_SYNC;
+    u32 *VI_LEAP;
+    u32 *VI_H_START;
+    u32 *VI_V_START;
+    u32 *VI_V_BURST;
+    u32 *VI_X_SCALE;
+    u32 *VI_Y_SCALE;
+};
+
+extern N64Regs REG;
+extern u8 *DMEM;
+extern u8 *IMEM;
+extern u8 *RDRAM;
+extern u64 TMEM[512];
+extern u32 RDRAMSize;
+
+#endif
+
diff --git a/source/gles2n64/src/OpenGL.cpp b/source/gles2n64/src/OpenGL.cpp
new file mode 100755 (executable)
index 0000000..be71371
--- /dev/null
@@ -0,0 +1,1361 @@
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+//// paulscode, added for SDL linkage:
+#ifdef USE_SDL
+    #include <SDL.h>
+     // TODO: Remove this bandaid for SDL 2.0 compatibility (needed for SDL_SetVideoMode)
+    #if SDL_VERSION_ATLEAST(2,0,0)
+    #include "sdl2_compat.h" // Slightly hacked version of core/vidext_sdl2_compat.h
+    #endif
+       #include "eglport.h"
+#endif
+////
+
+#include "Common.h"
+#include "gles2N64.h"
+#include "OpenGL.h"
+#include "Types.h"
+#include "N64.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "Textures.h"
+#include "ShaderCombiner.h"
+#include "VI.h"
+#include "RSP.h"
+#include "Config.h"
+#include "ticks.h"
+
+#include "FrameSkipper.h"
+
+//#include "ae_bridge.h"
+
+//// paulscode, function prototype missing from Yongzh's code
+void OGL_UpdateDepthUpdate();
+////
+
+#ifdef TEXTURECACHE_TEST
+int     TextureCacheTime = 0;
+#endif
+
+
+#ifdef RENDERSTATE_TEST
+int     StateChanges = 0;
+#endif
+
+#ifdef SHADER_TEST
+int     ProgramSwaps = 0;
+#endif
+
+#ifdef BATCH_TEST
+int     TotalDrawTime = 0;
+int     TotalTriangles = 0;
+int     TotalDrawCalls = 0;
+#define glDrawElements(A,B,C,D) \
+    TotalTriangles += B; TotalDrawCalls++; int t = ticksGetTicks(); glDrawElements(A,B,C,D); TotalDrawTime += (ticksGetTicks() - t);
+#define glDrawArrays(A,B,C) \
+    TotalTriangles += C; TotalDrawCalls++; int t = ticksGetTicks(); glDrawArrays(A,B,C); TotalDrawTime += (ticksGetTicks() - t);
+
+#endif
+
+GLInfo OGL;
+
+const char _default_vsh[] = "                           \n\t" \
+"attribute highp vec2 aPosition;                        \n\t" \
+"attribute highp vec2 aTexCoord;                        \n\t" \
+"varying mediump vec2 vTexCoord;                        \n\t" \
+"void main(){                                           \n\t" \
+"gl_Position = vec4(aPosition.x, aPosition.y, 0.0, 1.0);\n\t" \
+"vTexCoord = aTexCoord;                                 \n\t" \
+"}                                                      \n\t";
+
+const char _default_fsh[] = "                           \n\t" \
+"uniform sampler2D uTex;                                \n\t" \
+"varying mediump vec2 vTexCoord;                        \n\t" \
+"void main(){                                           \n\t" \
+"gl_FragColor = texture2D(uTex, vTexCoord);             \n\t" \
+"}                                                      \n\t";
+
+void OGL_EnableRunfast()
+{
+#ifdef ARM_ASM
+       static const unsigned int x = 0x04086060;
+       static const unsigned int y = 0x03000000;
+       int r;
+       asm volatile (
+               "fmrx   %0, fpscr                       \n\t"   //r0 = FPSCR
+               "and    %0, %0, %1                      \n\t"   //r0 = r0 & 0x04086060
+               "orr    %0, %0, %2                      \n\t"   //r0 = r0 | 0x03000000
+               "fmxr   fpscr, %0                       \n\t"   //FPSCR = r0
+               : "=r"(r)
+               : "r"(x), "r"(y)
+       );
+#endif
+}
+
+int OGL_IsExtSupported( const char *extension )
+{
+       const GLubyte *extensions = NULL;
+       const GLubyte *start;
+       GLubyte *where, *terminator;
+
+       where = (GLubyte *) strchr(extension, ' ');
+       if (where || *extension == '\0')
+               return 0;
+
+       extensions = glGetString(GL_EXTENSIONS);
+
+    if (!extensions) return 0;
+
+       start = extensions;
+       for (;;)
+       {
+               where = (GLubyte *) strstr((const char *) start, extension);
+               if (!where)
+                       break;
+
+               terminator = where + strlen(extension);
+               if (where == start || *(where - 1) == ' ')
+                       if (*terminator == ' ' || *terminator == '\0')
+                               return 1;
+
+               start = terminator;
+       }
+
+       return 0;
+}
+
+extern void _glcompiler_error(GLint shader);
+
+void OGL_InitStates()
+{
+    GLint   success;
+
+    glEnable( GL_CULL_FACE );
+    glEnableVertexAttribArray( SC_POSITION );
+    glEnable( GL_DEPTH_TEST );
+    glDepthFunc( GL_ALWAYS );
+    glDepthMask( GL_FALSE );
+    glEnable( GL_SCISSOR_TEST );
+
+///// paulscode, fixes missing graphics on Qualcomm, Adreno:
+    glDepthRangef(0.0f, 1.0f);
+
+    // default values (only seem to work on OMAP!)
+    glPolygonOffset(0.2f, 0.2f);
+
+    //// paulscode, added for different configurations based on hardware
+    // (part of the missing shadows and stars bug fix)
+/*    int hardwareType = Android_JNI_GetHardwareType();
+    float f1, f2;
+    Android_JNI_GetPolygonOffset(hardwareType, 1, &f1, &f2);
+    glPolygonOffset( f1, f2 );
+*/    ////
+
+// some other settings that have been tried, which do not work:
+    //glDepthRangef(1.0f, 0.0f);  // reverses depth-order on OMAP3 chipsets
+    //glPolygonOffset(-0.2f, -0.2f);
+    //glDepthRangef( 0.09f, (float)0x7FFF );  // should work, but not on Adreno
+    //glPolygonOffset( -0.2f, 0.2f );
+    //glDepthRangef(0.0f, (float)0x7FFF);  // what Yongzh used, broken on Adreno
+    //glPolygonOffset(0.2f, 0.2f);
+/////
+    
+
+    glViewport(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height);
+
+    //create default shader program
+    LOG( LOG_VERBOSE, "Generate Default Shader Program.\n" );
+
+    const char *src[1];
+    src[0] = _default_fsh;
+    OGL.defaultFragShader = glCreateShader( GL_FRAGMENT_SHADER );
+    glShaderSource( OGL.defaultFragShader, 1, (const char**) src, NULL );
+    glCompileShader( OGL.defaultFragShader );
+    glGetShaderiv( OGL.defaultFragShader, GL_COMPILE_STATUS, &success );
+    if (!success)
+    {
+        LOG(LOG_ERROR, "Failed to produce default fragment shader.\n");
+    }
+
+    src[0] = _default_vsh;
+    OGL.defaultVertShader = glCreateShader( GL_VERTEX_SHADER );
+    glShaderSource( OGL.defaultVertShader, 1, (const char**) src, NULL );
+    glCompileShader( OGL.defaultVertShader );
+    glGetShaderiv( OGL.defaultVertShader, GL_COMPILE_STATUS, &success );
+    if( !success )
+    {
+        LOG( LOG_ERROR, "Failed to produce default vertex shader.\n" );
+        _glcompiler_error( OGL.defaultVertShader );
+    }
+
+    OGL.defaultProgram = glCreateProgram();
+    glBindAttribLocation( OGL.defaultProgram, 0, "aPosition" );
+    glBindAttribLocation( OGL.defaultProgram, 1, "aTexCoord" );
+    glAttachShader( OGL.defaultProgram, OGL.defaultFragShader );
+    glAttachShader( OGL.defaultProgram, OGL.defaultVertShader );
+    glLinkProgram( OGL.defaultProgram );
+    glGetProgramiv( OGL.defaultProgram, GL_LINK_STATUS, &success );
+    if( !success )
+    {
+        LOG( LOG_ERROR, "Failed to link default program.\n" );
+        _glcompiler_error( OGL.defaultFragShader );
+    }
+    glUniform1i( glGetUniformLocation( OGL.defaultProgram, "uTex" ), 0 );
+    glUseProgram( OGL.defaultProgram );
+
+}
+
+void OGL_UpdateScale()
+{
+    OGL.scaleX = (float)config.framebuffer.width / (float)VI.width;
+    OGL.scaleY = (float)config.framebuffer.height / (float)VI.height;
+}
+
+void OGL_ResizeWindow(int x, int y, int width, int height)
+{
+    config.window.xpos = x;
+    config.window.ypos = y;
+    config.window.width = width;
+    config.window.height = height;
+
+    config.framebuffer.xpos = x;
+    config.framebuffer.ypos = y;
+    config.framebuffer.width = width;
+    config.framebuffer.height = height;
+    OGL_UpdateScale();
+
+    glViewport(config.framebuffer.xpos, config.framebuffer.ypos,
+            config.framebuffer.width, config.framebuffer.height);
+}
+
+////// paulscode, added for SDL linkage
+#ifdef USE_SDL
+bool OGL_SDL_Start()
+{
+    /* Initialize SDL */
+    LOG(LOG_MINIMAL, "Initializing SDL video subsystem...\n" );
+    if (SDL_InitSubSystem( SDL_INIT_VIDEO ) == -1)
+    {
+         LOG(LOG_ERROR, "Error initializing SDL video subsystem: %s\n", SDL_GetError() );
+        return FALSE;
+    }
+/*SEB*
+    int current_w = config.window.width;
+    int current_h = config.window.height;
+*/
+    int current_w = 800;
+    int current_h = 480;
+    /* Set the video mode */
+    LOG(LOG_MINIMAL, "Setting video mode %dx%d...\n", current_w, current_h );
+
+// TODO: I should actually check what the pixelformat is, rather than assuming 16 bpp (RGB_565) or 32 bpp (RGBA_8888):
+//// paulscode, added for switching between modes RGBA8888 and RGB565
+// (part of the color banding fix)
+int bitsPP;
+/*if( Android_JNI_UseRGBA8888() )
+    bitsPP = 32;
+else*/
+    bitsPP = 16;
+////
+
+    // TODO: Replace SDL_SetVideoMode with something that is SDL 2.0 compatible
+    //       Better yet, eliminate all SDL calls by using the Mupen64Plus core api
+    if (!(OGL.hScreen = SDL_SetVideoMode( current_w, current_h, bitsPP, SDL_HWSURFACE )))
+    {
+        LOG(LOG_ERROR, "Problem setting videomode %dx%d: %s\n", current_w, current_h, SDL_GetError() );
+        SDL_QuitSubSystem( SDL_INIT_VIDEO );
+        return FALSE;
+    }
+
+//// paulscode, fixes the screen-size problem
+    const float ratio = ( config.romPAL ? 9.0f/11.0f : 0.75f );
+    int videoWidth = config.window.refwidth;
+    int videoHeight = config.window.refheight;
+    int x = 0;
+    int y = 0;
+    
+    //re-scale width and height on per-rom basis
+    float width = /*(float)videoWidth * (float)config.window.refwidth /*/ 800.f;
+    float height = /*(float)videoHeight * (float)config.window.refheight / */480.f;
+    
+   if (!config.stretchVideo) {
+/*     if ((float)videoWith*480.0f/(float)videoHeight/800.0f>1.0f) {
+               //scale by Width
+       } else {
+               //scale by Height
+       }*/
+        videoWidth = (int) (height / ratio);
+        if (videoWidth > width) {
+            videoWidth = width;
+            videoHeight = (int) (width * ratio);
+        }
+    } else {
+       videoWidth=800;
+       videoHeight=480;
+    }
+    x = (width - videoWidth) / 2;
+    y = (height - videoHeight) / 2;
+    
+    //set xpos and ypos
+    config.window.xpos = x;
+    config.window.ypos = y;
+    config.framebuffer.xpos = x;
+    config.framebuffer.ypos = y;
+    
+    //set width and height
+    config.window.width = (int)videoWidth;
+    config.window.height = (int)videoHeight;
+    config.framebuffer.width = (int)videoWidth;
+    config.framebuffer.height = (int)videoHeight;
+       
+       EGL_Open(800, 480);
+////
+    return true;
+}
+#endif
+//////
+
+#ifdef USE_SDL
+void Android_JNI_SwapWindow()
+{
+       EGL_SwapBuffers();
+}
+#endif
+
+
+bool OGL_Start()
+{
+// paulscode, initialize SDL
+#ifdef USE_SDL
+    if (!OGL_SDL_Start())
+        return false;
+#endif
+//
+
+    OGL_InitStates();
+
+#ifdef USE_SDL
+/////// paulscode, graphics bug-fixes
+    float depth = gDP.fillColor.z ;
+    glDisable( GL_SCISSOR_TEST );
+    glDepthMask( GL_TRUE );  // fixes side-bar graphics glitches
+//    glClearDepthf( depth );  // broken on Qualcomm Adreno
+    glClearDepthf( 1.0f );  // fixes missing graphics on Qualcomm Adreno
+    glClearColor( 0, 0, 0, 1 );
+    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
+    glFinish();
+    Android_JNI_SwapWindow();  // paulscode, fix for black-screen bug
+    glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
+    glFinish();
+    OGL_UpdateDepthUpdate();
+    glEnable( GL_SCISSOR_TEST );
+////////
+#endif
+
+    //create framebuffer
+    if (config.framebuffer.enable)
+    {
+        LOG(LOG_VERBOSE, "Create offscreen framebuffer. \n");
+        if (config.framebuffer.width == config.window.width && config.framebuffer.height == config.window.height)
+        {
+            LOG(LOG_WARNING, "There's no point in using a offscreen framebuffer when the window and screen dimensions are the same\n");
+        }
+
+        glGenFramebuffers(1, &OGL.framebuffer.fb);
+        glGenRenderbuffers(1, &OGL.framebuffer.depth_buffer);
+        glGenTextures(1, &OGL.framebuffer.color_buffer);
+        glBindRenderbuffer(GL_RENDERBUFFER, OGL.framebuffer.depth_buffer);
+        glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, config.framebuffer.width, config.framebuffer.height);
+        glBindTexture(GL_TEXTURE_2D, OGL.framebuffer.color_buffer);
+        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, config.framebuffer.width, config.framebuffer.height, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, NULL);
+        glBindFramebuffer(GL_FRAMEBUFFER, OGL.framebuffer.fb);
+        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, OGL.framebuffer.color_buffer, 0);
+        glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, OGL.framebuffer.depth_buffer);
+
+        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+        if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
+        {
+            LOG(LOG_ERROR, "Incomplete Framebuffer Object: ");
+            switch(glCheckFramebufferStatus(GL_FRAMEBUFFER))
+            {
+                case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
+                    printf("Incomplete Attachment. \n"); break;
+                case GL_FRAMEBUFFER_UNSUPPORTED:
+                    printf("Framebuffer Unsupported. \n"); break;
+                case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
+                    printf("Incomplete Dimensions. \n"); break;
+            }
+            config.framebuffer.enable = 0;
+            glBindFramebuffer(GL_FRAMEBUFFER, 0);
+        }
+    }
+
+    //check extensions
+    if ((config.texture.maxAnisotropy>0) && !OGL_IsExtSupported("GL_EXT_texture_filter_anistropic"))
+    {
+        LOG(LOG_WARNING, "Anistropic Filtering is not supported.\n");
+        config.texture.maxAnisotropy = 0;
+    }
+
+    float f = 0;
+    glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &f);
+    if (config.texture.maxAnisotropy > ((int)f))
+    {
+        LOG(LOG_WARNING, "Clamping max anistropy to %ix.\n", (int)f);
+        config.texture.maxAnisotropy = (int)f;
+    }
+
+    //Print some info
+    LOG(LOG_VERBOSE, "Width: %i Height:%i \n", config.framebuffer.width, config.framebuffer.height);
+    LOG(LOG_VERBOSE, "[gles2n64]: Enable Runfast... \n");
+
+    OGL_EnableRunfast();
+    OGL_UpdateScale();
+
+    //We must have a shader bound before binding any textures:
+    ShaderCombiner_Init();
+    ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0));
+    ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, SHADE, 0, 0, 0, 1, 0, 0, 0, SHADE, 0, 0, 0, 1));
+
+    TextureCache_Init();
+
+    memset(OGL.triangles.vertices, 0, VERTBUFF_SIZE * sizeof(SPVertex));
+    memset(OGL.triangles.elements, 0, ELEMBUFF_SIZE * sizeof(GLubyte));
+    OGL.triangles.num = 0;
+
+#ifdef __TRIBUFFER_OPT
+    __indexmap_init();
+#endif
+
+    OGL.frameSkipped = 0;
+    for(int i = 0; i < OGL_FRAMETIME_NUM; i++) OGL.frameTime[i] = 0;
+
+    OGL.renderingToTexture = false;
+    OGL.renderState = RS_NONE;
+    gSP.changed = gDP.changed = 0xFFFFFFFF;
+    VI.displayNum = 0;
+    glGetError();
+
+    return TRUE;
+}
+
+void OGL_Stop()
+{
+    LOG(LOG_MINIMAL, "Stopping OpenGL\n");
+
+#ifdef USE_SDL
+       EGL_Close();
+    SDL_QuitSubSystem( SDL_INIT_VIDEO );
+#endif
+
+    if (config.framebuffer.enable)
+    {
+        glDeleteFramebuffers(1, &OGL.framebuffer.fb);
+        glDeleteTextures(1, &OGL.framebuffer.color_buffer);
+        glDeleteRenderbuffers(1, &OGL.framebuffer.depth_buffer);
+    }
+
+    glDeleteShader(OGL.defaultFragShader);
+    glDeleteShader(OGL.defaultVertShader);
+    glDeleteProgram(OGL.defaultProgram);
+
+    ShaderCombiner_Destroy();
+    TextureCache_Destroy();
+}
+
+void OGL_UpdateCullFace()
+{
+    if (config.enableFaceCulling && (gSP.geometryMode & G_CULL_BOTH))
+    {
+        glEnable( GL_CULL_FACE );
+        if ((gSP.geometryMode & G_CULL_BACK) && (gSP.geometryMode & G_CULL_FRONT))
+            glCullFace(GL_FRONT_AND_BACK);
+        else if (gSP.geometryMode & G_CULL_BACK)
+            glCullFace(GL_BACK);
+        else
+            glCullFace(GL_FRONT);
+    }
+    else
+        glDisable(GL_CULL_FACE);
+}
+
+void OGL_UpdateViewport()
+{
+    int x, y, w, h;
+    x = config.framebuffer.xpos + (int)(gSP.viewport.x * OGL.scaleX);
+    y = config.framebuffer.ypos + (int)((VI.height - (gSP.viewport.y + gSP.viewport.height)) * OGL.scaleY);
+    w = (int)(gSP.viewport.width * OGL.scaleX);
+    h = (int)(gSP.viewport.height * OGL.scaleY);
+
+    glViewport(x, y, w, h);
+}
+
+void OGL_UpdateDepthUpdate()
+{
+    if (gDP.otherMode.depthUpdate)
+        glDepthMask(GL_TRUE);
+    else
+        glDepthMask(GL_FALSE);
+}
+
+void OGL_UpdateScissor()
+{
+    int x, y, w, h;
+    x = config.framebuffer.xpos + (int)(gDP.scissor.ulx * OGL.scaleX);
+    y = config.framebuffer.ypos + (int)((VI.height - gDP.scissor.lry) * OGL.scaleY);
+    w = (int)((gDP.scissor.lrx - gDP.scissor.ulx) * OGL.scaleX);
+    h = (int)((gDP.scissor.lry - gDP.scissor.uly) * OGL.scaleY);
+    glScissor(x, y, w, h);
+}
+
+//copied from RICE VIDEO
+void OGL_SetBlendMode()
+{
+
+    u32 blender = gDP.otherMode.l >> 16;
+    u32 blendmode_1 = blender&0xcccc;
+    u32 blendmode_2 = blender&0x3333;
+
+    glEnable(GL_BLEND);
+    switch(gDP.otherMode.cycleType)
+    {
+        case G_CYC_FILL:
+            glDisable(GL_BLEND);
+            break;
+
+        case G_CYC_COPY:
+            glBlendFunc(GL_ONE, GL_ZERO);
+            break;
+
+        case G_CYC_2CYCLE:
+            if (gDP.otherMode.forceBlender && gDP.otherMode.depthCompare)
+            {
+                glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+                break;
+            }
+
+            switch(blendmode_1+blendmode_2)
+            {
+                case BLEND_PASS+(BLEND_PASS>>2):    // In * 0 + In * 1
+                case BLEND_FOG_APRIM+(BLEND_PASS>>2):
+                case BLEND_FOG_MEM_FOG_MEM + (BLEND_OPA>>2):
+                case BLEND_FOG_APRIM + (BLEND_OPA>>2):
+                case BLEND_FOG_ASHADE + (BLEND_OPA>>2):
+                case BLEND_BI_AFOG + (BLEND_OPA>>2):
+                case BLEND_FOG_ASHADE + (BLEND_NOOP>>2):
+                case BLEND_NOOP + (BLEND_OPA>>2):
+                case BLEND_NOOP4 + (BLEND_NOOP>>2):
+                case BLEND_FOG_ASHADE+(BLEND_PASS>>2):
+                case BLEND_FOG_3+(BLEND_PASS>>2):
+                    glDisable(GL_BLEND);
+                    break;
+
+                case BLEND_PASS+(BLEND_OPA>>2):
+                    if (gDP.otherMode.cvgXAlpha && gDP.otherMode.alphaCvgSel)
+                        glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+                    else
+                        glDisable(GL_BLEND);
+                    break;
+
+                case BLEND_PASS + (BLEND_XLU>>2):
+                case BLEND_FOG_ASHADE + (BLEND_XLU>>2):
+                case BLEND_FOG_APRIM + (BLEND_XLU>>2):
+                case BLEND_FOG_MEM_FOG_MEM + (BLEND_PASS>>2):
+                case BLEND_XLU + (BLEND_XLU>>2):
+                case BLEND_BI_AFOG + (BLEND_XLU>>2):
+                case BLEND_XLU + (BLEND_FOG_MEM_IN_MEM>>2):
+                case BLEND_PASS + (BLEND_FOG_MEM_IN_MEM>>2):
+                    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+                    break;
+
+                case BLEND_FOG_ASHADE+0x0301:
+                    glBlendFunc(GL_SRC_ALPHA, GL_ZERO);
+                    break;
+
+                case 0x0c08+0x1111:
+                    glBlendFunc(GL_ZERO, GL_DST_ALPHA);
+                    break;
+
+                default:
+                    if (blendmode_2 == (BLEND_PASS>>2))
+                        glDisable(GL_BLEND);
+                    else
+                        glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+                    break;
+                }
+                break;
+
+    default:
+
+        if (gDP.otherMode.forceBlender && gDP.otherMode.depthCompare && blendmode_1 != BLEND_FOG_ASHADE )
+        {
+            glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+            break;
+        }
+
+        switch (blendmode_1)
+        {
+            case BLEND_XLU:
+            case BLEND_BI_AIN:
+            case BLEND_FOG_MEM:
+            case BLEND_FOG_MEM_IN_MEM:
+            case BLEND_BLENDCOLOR:
+            case 0x00c0:
+                glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+                break;
+
+            case BLEND_MEM_ALPHA_IN:
+                glBlendFunc(GL_ZERO, GL_DST_ALPHA);
+                break;
+
+            case BLEND_OPA:
+                //if( options.enableHackForGames == HACK_FOR_MARIO_TENNIS )
+                //{
+                //   glBlendFunc(BLEND_SRCALPHA, BLEND_INVSRCALPHA);
+                //}
+
+                glDisable(GL_BLEND);
+                break;
+
+            case BLEND_PASS:
+            case BLEND_NOOP:
+            case BLEND_FOG_ASHADE:
+            case BLEND_FOG_MEM_3:
+            case BLEND_BI_AFOG:
+                glDisable(GL_BLEND);
+                break;
+
+            case BLEND_FOG_APRIM:
+                glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ZERO);
+                break;
+
+            case BLEND_NOOP3:
+            case BLEND_NOOP5:
+            case BLEND_MEM:
+                glBlendFunc(GL_ZERO, GL_ONE);
+                break;
+
+            default:
+                glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+        }
+    }
+
+}
+
+void OGL_UpdateStates()
+{
+    if (gDP.otherMode.cycleType == G_CYC_COPY)
+        ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0));
+    else if (gDP.otherMode.cycleType == G_CYC_FILL)
+        ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, SHADE, 0, 0, 0, 1, 0, 0, 0, SHADE, 0, 0, 0, 1));
+    else
+        ShaderCombiner_Set(gDP.combine.mux);
+
+#ifdef SHADER_TEST
+    ProgramSwaps += scProgramChanged;
+#endif
+
+    if (gSP.changed & CHANGED_GEOMETRYMODE)
+    {
+        OGL_UpdateCullFace();
+
+        if (gSP.geometryMode & G_ZBUFFER)
+            glEnable(GL_DEPTH_TEST);
+        else
+            glDisable(GL_DEPTH_TEST);
+
+    }
+
+    if (gDP.changed & CHANGED_CONVERT)
+    {
+        SC_SetUniform1f(uK4, gDP.convert.k4);
+        SC_SetUniform1f(uK5, gDP.convert.k5);
+    }
+
+    if (gDP.changed & CHANGED_RENDERMODE || gDP.changed & CHANGED_CYCLETYPE)
+    {
+        if (gDP.otherMode.cycleType == G_CYC_1CYCLE || gDP.otherMode.cycleType == G_CYC_2CYCLE)
+        {
+            //glDepthFunc((gDP.otherMode.depthCompare) ? GL_GEQUAL : GL_ALWAYS);
+            glDepthFunc((gDP.otherMode.depthCompare) ? GL_LESS : GL_ALWAYS);
+            glDepthMask((gDP.otherMode.depthUpdate) ? GL_TRUE : GL_FALSE);
+
+            if (gDP.otherMode.depthMode == ZMODE_DEC)
+                glEnable(GL_POLYGON_OFFSET_FILL);
+           else
+                glDisable(GL_POLYGON_OFFSET_FILL);
+        }
+        else
+        {
+            glDepthFunc(GL_ALWAYS);
+            glDepthMask(GL_FALSE);
+        }
+    }
+
+    if ((gDP.changed & CHANGED_BLENDCOLOR) || (gDP.changed & CHANGED_RENDERMODE))
+        SC_SetUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5f : gDP.blendColor.a);
+
+    if (gDP.changed & CHANGED_SCISSOR)
+        OGL_UpdateScissor();
+
+    if (gSP.changed & CHANGED_VIEWPORT)
+        OGL_UpdateViewport();
+
+    if (gSP.changed & CHANGED_FOGPOSITION)
+    {
+        SC_SetUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f);
+        SC_SetUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f);
+    }
+
+    if (gSP.changed & CHANGED_TEXTURESCALE)
+    {
+        if (scProgramCurrent->usesT0 || scProgramCurrent->usesT1)
+            SC_SetUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet);
+    }
+
+    if ((gSP.changed & CHANGED_TEXTURE) || (gDP.changed & CHANGED_TILE) || (gDP.changed & CHANGED_TMEM))
+    {
+        //For some reason updating the texture cache on the first frame of LOZ:OOT causes a NULL Pointer exception...
+        if (scProgramCurrent)
+        {
+            if (scProgramCurrent->usesT0)
+            {
+#ifdef TEXTURECACHE_TEST
+                unsigned t = ticksGetTicks();
+                TextureCache_Update(0);
+                TextureCacheTime += (ticksGetTicks() - t);
+#else
+                TextureCache_Update(0);
+#endif
+                SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult);
+                SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT);
+                SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT);
+                SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT);
+            }
+            //else TextureCache_ActivateDummy(0);
+
+            //Note: enabling dummies makes some F-zero X textures flicker.... strange.
+
+            if (scProgramCurrent->usesT1)
+            {
+#ifdef TEXTURECACHE_TEST
+                unsigned t = ticksGetTicks();
+                TextureCache_Update(1);
+                TextureCacheTime += (ticksGetTicks() - t);
+#else
+                TextureCache_Update(1);
+#endif
+                SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult);
+                SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT);
+                SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT);
+                SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT);
+            }
+            //else TextureCache_ActivateDummy(1);
+        }
+    }
+
+    if ((gDP.changed & CHANGED_FOGCOLOR) && config.enableFog)
+        SC_SetUniform4fv(uFogColor, &gDP.fogColor.r );
+
+    if (gDP.changed & CHANGED_ENV_COLOR)
+        SC_SetUniform4fv(uEnvColor, &gDP.envColor.r);
+
+    if (gDP.changed & CHANGED_PRIM_COLOR)
+    {
+        SC_SetUniform4fv(uPrimColor, &gDP.primColor.r);
+        SC_SetUniform1f(uPrimLODFrac, gDP.primColor.l);
+    }
+
+    if ((gDP.changed & CHANGED_RENDERMODE) || (gDP.changed & CHANGED_CYCLETYPE))
+    {
+#ifndef OLD_BLENDMODE
+        OGL_SetBlendMode();
+#else
+        if ((gDP.otherMode.forceBlender) &&
+            (gDP.otherMode.cycleType != G_CYC_COPY) &&
+            (gDP.otherMode.cycleType != G_CYC_FILL) &&
+            !(gDP.otherMode.alphaCvgSel))
+        {
+            glEnable( GL_BLEND );
+
+            switch (gDP.otherMode.l >> 16)
+            {
+                case 0x0448: // Add
+                case 0x055A:
+                    glBlendFunc( GL_ONE, GL_ONE );
+                    break;
+                case 0x0C08: // 1080 Sky
+                case 0x0F0A: // Used LOTS of places
+                    glBlendFunc( GL_ONE, GL_ZERO );
+                    break;
+
+                case 0x0040: // Fzero
+                case 0xC810: // Blends fog
+                case 0xC811: // Blends fog
+                case 0x0C18: // Standard interpolated blend
+                case 0x0C19: // Used for antialiasing
+                case 0x0050: // Standard interpolated blend
+                case 0x0055: // Used for antialiasing
+                    glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
+                    break;
+
+                case 0x0FA5: // Seems to be doing just blend color - maybe combiner can be used for this?
+                case 0x5055: // Used in Paper Mario intro, I'm not sure if this is right...
+                    glBlendFunc( GL_ZERO, GL_ONE );
+                    break;
+
+                default:
+                    LOG(LOG_VERBOSE, "Unhandled blend mode=%x", gDP.otherMode.l >> 16);
+                    glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
+                    break;
+            }
+        }
+        else
+        {
+            glDisable( GL_BLEND );
+        }
+
+        if (gDP.otherMode.cycleType == G_CYC_FILL)
+        {
+            glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
+            glEnable( GL_BLEND );
+        }
+#endif
+    }
+
+    gDP.changed &= CHANGED_TILE | CHANGED_TMEM;
+    gSP.changed &= CHANGED_TEXTURE | CHANGED_MATRIX;
+}
+
+void OGL_DrawTriangle(SPVertex *vertices, int v0, int v1, int v2)
+{
+
+}
+
+void OGL_AddTriangle(int v0, int v1, int v2)
+{
+    OGL.triangles.elements[OGL.triangles.num++] = v0;
+    OGL.triangles.elements[OGL.triangles.num++] = v1;
+    OGL.triangles.elements[OGL.triangles.num++] = v2;
+}
+
+void OGL_SetColorArray()
+{
+    if (scProgramCurrent->usesCol)
+        glEnableVertexAttribArray(SC_COLOR);
+    else
+        glDisableVertexAttribArray(SC_COLOR);
+}
+
+void OGL_SetTexCoordArrays()
+{
+    if (scProgramCurrent->usesT0)
+        glEnableVertexAttribArray(SC_TEXCOORD0);
+    else
+        glDisableVertexAttribArray(SC_TEXCOORD0);
+
+    if (scProgramCurrent->usesT1)
+        glEnableVertexAttribArray(SC_TEXCOORD1);
+    else
+        glDisableVertexAttribArray(SC_TEXCOORD1);
+}
+
+void OGL_DrawTriangles()
+{
+    if (OGL.renderingToTexture && config.ignoreOffscreenRendering)
+    {
+        OGL.triangles.num = 0;
+        return;
+    }
+
+    if (OGL.triangles.num == 0) return;
+
+    if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate)
+        OGL_SwapBuffers();
+
+    if (gSP.changed || gDP.changed)
+        OGL_UpdateStates();
+
+    if (OGL.renderState != RS_TRIANGLE || scProgramChanged)
+    {
+        OGL_SetColorArray();
+        OGL_SetTexCoordArrays();
+        glDisableVertexAttribArray(SC_TEXCOORD1);
+        SC_ForceUniform1f(uRenderState, RS_TRIANGLE);
+    }
+
+    if (OGL.renderState != RS_TRIANGLE)
+    {
+#ifdef RENDERSTATE_TEST
+        StateChanges++;
+#endif
+        glVertexAttribPointer(SC_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].x);
+        glVertexAttribPointer(SC_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].r);
+        glVertexAttribPointer(SC_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].s);
+
+        OGL_UpdateCullFace();
+        OGL_UpdateViewport();
+        glEnable(GL_SCISSOR_TEST);
+        OGL.renderState = RS_TRIANGLE;
+    }
+
+    glDrawElements(GL_TRIANGLES, OGL.triangles.num, GL_UNSIGNED_BYTE, OGL.triangles.elements);
+    OGL.triangles.num = 0;
+
+#ifdef __TRIBUFFER_OPT
+    __indexmap_clear();
+#endif
+}
+
+void OGL_DrawLine(int v0, int v1, float width )
+{
+    if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return;
+
+    if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate)
+        OGL_SwapBuffers();
+
+    if (gSP.changed || gDP.changed)
+        OGL_UpdateStates();
+
+    if (OGL.renderState != RS_LINE || scProgramChanged)
+    {
+#ifdef RENDERSTATE_TEST
+        StateChanges++;
+#endif
+        OGL_SetColorArray();
+        glDisableVertexAttribArray(SC_TEXCOORD0);
+        glDisableVertexAttribArray(SC_TEXCOORD1);
+        glVertexAttribPointer(SC_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].x);
+        glVertexAttribPointer(SC_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].r);
+
+        SC_ForceUniform1f(uRenderState, RS_LINE);
+        OGL_UpdateCullFace();
+        OGL_UpdateViewport();
+        OGL.renderState = RS_LINE;
+    }
+
+    unsigned short elem[2];
+    elem[0] = v0;
+    elem[1] = v1;
+    glLineWidth( width * OGL.scaleX );
+    glDrawElements(GL_LINES, 2, GL_UNSIGNED_SHORT, elem);
+}
+
+void OGL_DrawRect( int ulx, int uly, int lrx, int lry, float *color)
+{
+    if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return;
+
+    if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate)
+        OGL_SwapBuffers();
+
+    if (gSP.changed || gDP.changed)
+        OGL_UpdateStates();
+
+    if (OGL.renderState != RS_RECT || scProgramChanged)
+    {
+        glDisableVertexAttribArray(SC_COLOR);
+        glDisableVertexAttribArray(SC_TEXCOORD0);
+        glDisableVertexAttribArray(SC_TEXCOORD1);
+        SC_ForceUniform1f(uRenderState, RS_RECT);
+    }
+
+    if (OGL.renderState != RS_RECT)
+    {
+#ifdef RENDERSTATE_TEST
+        StateChanges++;
+#endif
+        glVertexAttrib4f(SC_POSITION, 0, 0, gSP.viewport.nearz, 1.0);
+        glVertexAttribPointer(SC_POSITION, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].x);
+        OGL.renderState = RS_RECT;
+    }
+
+    glViewport(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height );
+    glDisable(GL_SCISSOR_TEST);
+    glDisable(GL_CULL_FACE);
+
+    OGL.rect[0].x = (float) ulx * (2.0f * VI.rwidth) - 1.0;
+    OGL.rect[0].y = (float) uly * (-2.0f * VI.rheight) + 1.0;
+    OGL.rect[1].x = (float) (lrx+1) * (2.0f * VI.rwidth) - 1.0;
+    OGL.rect[1].y = OGL.rect[0].y;
+    OGL.rect[2].x = OGL.rect[0].x;
+    OGL.rect[2].y = (float) (lry+1) * (-2.0f * VI.rheight) + 1.0;
+    OGL.rect[3].x = OGL.rect[1].x;
+    OGL.rect[3].y = OGL.rect[2].y;
+
+    glVertexAttrib4fv(SC_COLOR, color);
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+    glEnable(GL_SCISSOR_TEST);
+    OGL_UpdateViewport();
+
+}
+
+void OGL_DrawTexturedRect( float ulx, float uly, float lrx, float lry, float uls, float ult, float lrs, float lrt, bool flip )
+{
+    if (config.hackBanjoTooie)
+    {
+        if (gDP.textureImage.width == gDP.colorImage.width &&
+            gDP.textureImage.format == G_IM_FMT_CI &&
+            gDP.textureImage.size == G_IM_SIZ_8b)
+        {
+            return;
+        }
+    }
+
+    if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return;
+
+    if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate)
+        OGL_SwapBuffers();
+
+    if (gSP.changed || gDP.changed)
+        OGL_UpdateStates();
+
+    if (OGL.renderState != RS_TEXTUREDRECT || scProgramChanged)
+    {
+        glDisableVertexAttribArray(SC_COLOR);
+        OGL_SetTexCoordArrays();
+        SC_ForceUniform1f(uRenderState, RS_TEXTUREDRECT);
+    }
+
+    if (OGL.renderState != RS_TEXTUREDRECT)
+    {
+#ifdef RENDERSTATE_TEST
+        StateChanges++;
+#endif
+        glVertexAttrib4f(SC_COLOR, 0, 0, 0, 0);
+        glVertexAttrib4f(SC_POSITION, 0, 0, (gDP.otherMode.depthSource == G_ZS_PRIM) ? gDP.primDepth.z : gSP.viewport.nearz, 1.0);
+        glVertexAttribPointer(SC_POSITION, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].x);
+        glVertexAttribPointer(SC_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].s0);
+        glVertexAttribPointer(SC_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].s1);
+        OGL.renderState = RS_TEXTUREDRECT;
+    }
+
+    glViewport(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height);
+    glDisable(GL_CULL_FACE);
+
+    OGL.rect[0].x = (float) ulx * (2.0f * VI.rwidth) - 1.0f;
+    OGL.rect[0].y = (float) uly * (-2.0f * VI.rheight) + 1.0f;
+    OGL.rect[1].x = (float) (lrx) * (2.0f * VI.rwidth) - 1.0f;
+    OGL.rect[1].y = OGL.rect[0].y;
+    OGL.rect[2].x = OGL.rect[0].x;
+    OGL.rect[2].y = (float) (lry) * (-2.0f * VI.rheight) + 1.0f;
+    OGL.rect[3].x = OGL.rect[1].x;
+    OGL.rect[3].y = OGL.rect[2].y;
+
+    if (scProgramCurrent->usesT0 && cache.current[0] && gSP.textureTile[0])
+    {
+        OGL.rect[0].s0 = uls * cache.current[0]->shiftScaleS - gSP.textureTile[0]->fuls;
+        OGL.rect[0].t0 = ult * cache.current[0]->shiftScaleT - gSP.textureTile[0]->fult;
+        OGL.rect[3].s0 = (lrs + 1.0f) * cache.current[0]->shiftScaleS - gSP.textureTile[0]->fuls;
+        OGL.rect[3].t0 = (lrt + 1.0f) * cache.current[0]->shiftScaleT - gSP.textureTile[0]->fult;
+
+        if ((cache.current[0]->maskS) && !(cache.current[0]->mirrorS) && (fmod( OGL.rect[0].s0, cache.current[0]->width ) == 0.0f))
+        {
+            OGL.rect[3].s0 -= OGL.rect[0].s0;
+            OGL.rect[0].s0 = 0.0f;
+        }
+
+        if ((cache.current[0]->maskT)  && !(cache.current[0]->mirrorT) && (fmod( OGL.rect[0].t0, cache.current[0]->height ) == 0.0f))
+        {
+            OGL.rect[3].t0 -= OGL.rect[0].t0;
+            OGL.rect[0].t0 = 0.0f;
+        }
+
+        glActiveTexture( GL_TEXTURE0);
+        if ((OGL.rect[0].s0 >= 0.0f) && (OGL.rect[3].s0 <= cache.current[0]->width))
+            glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
+
+        if ((OGL.rect[0].t0 >= 0.0f) && (OGL.rect[3].t0 <= cache.current[0]->height))
+            glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
+
+        OGL.rect[0].s0 *= cache.current[0]->scaleS;
+        OGL.rect[0].t0 *= cache.current[0]->scaleT;
+        OGL.rect[3].s0 *= cache.current[0]->scaleS;
+        OGL.rect[3].t0 *= cache.current[0]->scaleT;
+    }
+
+    if (scProgramCurrent->usesT1 && cache.current[1] && gSP.textureTile[1])
+    {
+        OGL.rect[0].s1 = uls * cache.current[1]->shiftScaleS - gSP.textureTile[1]->fuls;
+        OGL.rect[0].t1 = ult * cache.current[1]->shiftScaleT - gSP.textureTile[1]->fult;
+        OGL.rect[3].s1 = (lrs + 1.0f) * cache.current[1]->shiftScaleS - gSP.textureTile[1]->fuls;
+        OGL.rect[3].t1 = (lrt + 1.0f) * cache.current[1]->shiftScaleT - gSP.textureTile[1]->fult;
+
+        if ((cache.current[1]->maskS) && (fmod( OGL.rect[0].s1, cache.current[1]->width ) == 0.0f) && !(cache.current[1]->mirrorS))
+        {
+            OGL.rect[3].s1 -= OGL.rect[0].s1;
+            OGL.rect[0].s1 = 0.0f;
+        }
+
+        if ((cache.current[1]->maskT) && (fmod( OGL.rect[0].t1, cache.current[1]->height ) == 0.0f) && !(cache.current[1]->mirrorT))
+        {
+            OGL.rect[3].t1 -= OGL.rect[0].t1;
+            OGL.rect[0].t1 = 0.0f;
+        }
+
+        glActiveTexture( GL_TEXTURE1);
+        if ((OGL.rect[0].s1 == 0.0f) && (OGL.rect[3].s1 <= cache.current[1]->width))
+            glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
+
+        if ((OGL.rect[0].t1 == 0.0f) && (OGL.rect[3].t1 <= cache.current[1]->height))
+            glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
+
+        OGL.rect[0].s1 *= cache.current[1]->scaleS;
+        OGL.rect[0].t1 *= cache.current[1]->scaleT;
+        OGL.rect[3].s1 *= cache.current[1]->scaleS;
+        OGL.rect[3].t1 *= cache.current[1]->scaleT;
+    }
+
+    if ((gDP.otherMode.cycleType == G_CYC_COPY) && !config.texture.forceBilinear)
+    {
+        glActiveTexture(GL_TEXTURE0);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+    }
+
+    if (flip)
+    {
+        OGL.rect[1].s0 = OGL.rect[0].s0;
+        OGL.rect[1].t0 = OGL.rect[3].t0;
+        OGL.rect[1].s1 = OGL.rect[0].s1;
+        OGL.rect[1].t1 = OGL.rect[3].t1;
+        OGL.rect[2].s0 = OGL.rect[3].s0;
+        OGL.rect[2].t0 = OGL.rect[0].t0;
+        OGL.rect[2].s1 = OGL.rect[3].s1;
+        OGL.rect[2].t1 = OGL.rect[0].t1;
+    }
+    else
+    {
+        OGL.rect[1].s0 = OGL.rect[3].s0;
+        OGL.rect[1].t0 = OGL.rect[0].t0;
+        OGL.rect[1].s1 = OGL.rect[3].s1;
+        OGL.rect[1].t1 = OGL.rect[0].t1;
+        OGL.rect[2].s0 = OGL.rect[0].s0;
+        OGL.rect[2].t0 = OGL.rect[3].t0;
+        OGL.rect[2].s1 = OGL.rect[0].s1;
+        OGL.rect[2].t1 = OGL.rect[3].t1;
+    }
+
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+    OGL_UpdateViewport();
+}
+
+void OGL_ClearDepthBuffer()
+{
+    if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return;
+
+    if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate)
+        OGL_SwapBuffers();
+
+    //float depth = 1.0 - (gDP.fillColor.z / ((float)0x3FFF)); // broken on OMAP3
+    float depth = gDP.fillColor.z ;
+
+/////// paulscode, graphics bug-fixes
+    glDisable( GL_SCISSOR_TEST );
+    glDepthMask( GL_TRUE );  // fixes side-bar graphics glitches
+//    glClearDepthf( depth );  // broken on Qualcomm Adreno
+    glClearDepthf( 1.0f );  // fixes missing graphics on Qualcomm Adreno
+    glClearColor( 0, 0, 0, 1 );
+    glClear( GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT );
+    OGL_UpdateDepthUpdate();
+    glEnable( GL_SCISSOR_TEST );
+////////
+}
+
+void OGL_ClearColorBuffer( float *color )
+{
+    if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return;
+
+    if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate)
+        OGL_SwapBuffers();
+
+    glScissor(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height);
+    glClearColor( color[0], color[1], color[2], color[3] );
+    glClear( GL_COLOR_BUFFER_BIT );
+    OGL_UpdateScissor();
+
+}
+
+int OGL_CheckError()
+{
+    GLenum e = glGetError();
+    if (e != GL_NO_ERROR)
+    {
+        printf("GL Error: ");
+        switch(e)
+        {
+            case GL_INVALID_ENUM:   printf("INVALID ENUM"); break;
+            case GL_INVALID_VALUE:  printf("INVALID VALUE"); break;
+            case GL_INVALID_OPERATION:  printf("INVALID OPERATION"); break;
+            case GL_OUT_OF_MEMORY:  printf("OUT OF MEMORY"); break;
+        }
+        printf("\n");
+        return 1;
+    }
+    return 0;
+}
+
+void OGL_UpdateFrameTime()
+{
+    unsigned ticks = ticksGetTicks();
+    static unsigned lastFrameTicks = 0;
+    for(int i = OGL_FRAMETIME_NUM-1; i > 0; i--) OGL.frameTime[i] = OGL.frameTime[i-1];
+    OGL.frameTime[0] = ticks - lastFrameTicks;
+    lastFrameTicks = ticks;
+}
+
+void OGL_SwapBuffers()
+{
+    //OGL_DrawTriangles();
+    scProgramChanged = 0;
+#if 0
+    static int frames = 0;
+    static unsigned lastTicks = 0;
+    unsigned ticks = ticksGetTicks();
+
+    frames++;
+    if (ticks >= (lastTicks + 1000))
+    {
+
+        float fps = 1000.0f * (float) frames / (ticks - lastTicks);
+        LOG(LOG_MINIMAL, "fps = %.2f \n", fps);
+        LOG(LOG_MINIMAL, "skipped frame = %i of %i \n", OGL.frameSkipped, frames + OGL.frameSkipped);
+
+        OGL.frameSkipped = 0;
+
+#ifdef BATCH_TEST
+        LOG(LOG_MINIMAL, "time spent in draw calls per frame = %.2f ms\n", (float)TotalDrawTime / frames);
+        LOG(LOG_MINIMAL, "average draw calls per frame = %.0f\n", (float)TotalDrawCalls / frames);
+        LOG(LOG_MINIMAL, "average vertices per draw call = %.2f\n", (float)TotalTriangles / TotalDrawCalls);
+        TotalDrawCalls = 0;
+        TotalTriangles = 0;
+        TotalDrawTime = 0;
+#endif
+
+#ifdef SHADER_TEST
+        LOG(LOG_MINIMAL, "average shader changes per frame = %f\n", (float)ProgramSwaps / frames);
+        ProgramSwaps = 0;
+#endif
+
+#ifdef TEXTURECACHE_TEST
+        LOG(LOG_MINIMAL, "texture cache time per frame: %.2f ms\n", (float)TextureCacheTime/ frames);
+        LOG(LOG_MINIMAL, "texture cache per frame: hits=%.2f misses=%.2f\n", (float)cache.hits / frames,
+                (float)cache.misses / frames);
+        cache.hits = cache.misses = 0;
+        TextureCacheTime = 0;
+
+#endif
+        frames = 0;
+        lastTicks = ticks;
+    }
+#endif
+
+
+#ifdef PROFILE_GBI
+    u32 profileTicks = ticksGetTicks();
+    static u32 profileLastTicks = 0;
+    if (profileTicks >= (profileLastTicks + 5000))
+    {
+        LOG(LOG_MINIMAL, "GBI PROFILE DATA: %i ms \n", profileTicks - profileLastTicks);
+        LOG(LOG_MINIMAL, "=========================================================\n");
+        GBI_ProfilePrint(stdout);
+        LOG(LOG_MINIMAL, "=========================================================\n");
+        GBI_ProfileReset();
+        profileLastTicks = profileTicks;
+    }
+#endif
+
+    if (config.framebuffer.enable)
+    {
+        glBindFramebuffer(GL_FRAMEBUFFER, 0);
+        glClearColor( 0, 0, 0, 1 );
+        glClear( GL_COLOR_BUFFER_BIT );
+
+        glUseProgram(OGL.defaultProgram);
+        glDisable(GL_SCISSOR_TEST);
+        glDisable(GL_DEPTH_TEST);
+        glViewport(config.window.xpos, config.window.ypos, config.window.width, config.window.height);
+
+        static const float vert[] =
+        {
+            -1.0, -1.0, +0.0, +0.0,
+            +1.0, -1.0, +1.0, +0.0,
+            -1.0, +1.0, +0.0, +1.0,
+            +1.0, +1.0, +1.0, +1.0
+        };
+
+        glActiveTexture(GL_TEXTURE0);
+        glBindTexture(GL_TEXTURE_2D, OGL.framebuffer.color_buffer);
+        if (config.framebuffer.bilinear)
+        {
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        }
+        else
+        {
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+        }
+
+        glEnableVertexAttribArray(0);
+        glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (float*)vert);
+        glEnableVertexAttribArray(1);
+        glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (float*)vert + 2);
+        glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+        Android_JNI_SwapWindow(); // paulscode, fix for black-screen bug
+
+        glBindFramebuffer(GL_FRAMEBUFFER, OGL.framebuffer.fb);
+        OGL_UpdateViewport();
+        if (scProgramCurrent) glUseProgram(scProgramCurrent->program);
+        OGL.renderState = RS_NONE;
+    }
+    else
+    {
+        Android_JNI_SwapWindow(); // paulscode, fix for black-screen bug
+    }
+
+    // if emulator defined a render callback function, call it before
+       // buffer swap
+    if (renderCallback) (*renderCallback)();
+
+    OGL.screenUpdate = false;
+
+    if (config.forceBufferClear)
+    {
+/////// paulscode, graphics bug-fixes
+    float depth = gDP.fillColor.z ;
+    glDisable( GL_SCISSOR_TEST );
+    glDepthMask( GL_TRUE );  // fixes side-bar graphics glitches
+//    glClearDepthf( depth );  // broken on Qualcomm Adreno
+    glClearDepthf( 1.0f );  // fixes missing graphics on Qualcomm Adreno
+    glClearColor( 0, 0, 0, 1 );
+    glClear( GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT );
+    OGL_UpdateDepthUpdate();
+    glEnable( GL_SCISSOR_TEST );
+///////
+    }
+
+}
+
+void OGL_ReadScreen( void *dest, int *width, int *height )
+{
+    if (width)
+        *width = config.framebuffer.width;
+    if (height)
+        *height = config.framebuffer.height;
+
+    if (dest == NULL)
+        return;
+
+    glReadPixels( config.framebuffer.xpos, config.framebuffer.ypos,
+            config.framebuffer.width, config.framebuffer.height,
+            GL_RGBA, GL_UNSIGNED_BYTE, dest );
+}
+
diff --git a/source/gles2n64/src/OpenGL.h b/source/gles2n64/src/OpenGL.h
new file mode 100644 (file)
index 0000000..3db0743
--- /dev/null
@@ -0,0 +1,171 @@
+#ifndef OPENGL_H
+#define OPENGL_H
+
+#include <GLES2/gl2.h>
+#include <GLES2/gl2ext.h>
+#include "gSP.h"
+
+#ifdef USE_SDL
+//    #include <EGL/egl.h>  // Android 2.3 only
+//    #include <GLES2/gl2extimg.h>
+    #include <SDL.h>
+#endif
+
+#ifndef min
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+#define RS_NONE         0
+#define RS_TRIANGLE     1
+#define RS_RECT         2
+#define RS_TEXTUREDRECT 3
+#define RS_LINE         4
+
+
+#define SCREEN_UPDATE_AT_VI_UPDATE              1
+#define SCREEN_UPDATE_AT_VI_CHANGE              2
+#define SCREEN_UPDATE_AT_CI_CHANGE              3
+#define SCREEN_UPDATE_AT_1ST_CI_CHANGE          4
+#define SCREEN_UPDATE_AT_1ST_PRIMITIVE          5
+#define SCREEN_UPDATE_BEFORE_SCREEN_CLEAR       6
+#define SCREEN_UPDATE_AT_VI_UPDATE_AND_DRAWN    7
+
+
+#define BLEND_NOOP              0x0000
+#define BLEND_NOOP5             0xcc48  // Fog * 0 + Mem * 1
+#define BLEND_NOOP4             0xcc08  // Fog * 0 + In * 1
+#define BLEND_FOG_ASHADE        0xc800
+#define BLEND_FOG_3             0xc000  // Fog * AIn + In * 1-A
+#define BLEND_FOG_MEM           0xc440  // Fog * AFog + Mem * 1-A
+#define BLEND_FOG_APRIM         0xc400  // Fog * AFog + In * 1-A
+#define BLEND_BLENDCOLOR        0x8c88
+#define BLEND_BI_AFOG           0x8400  // Bl * AFog + In * 1-A
+#define BLEND_BI_AIN            0x8040  // Bl * AIn + Mem * 1-A
+#define BLEND_MEM               0x4c40  // Mem*0 + Mem*(1-0)?!
+#define BLEND_FOG_MEM_3         0x44c0  // Mem * AFog + Fog * 1-A
+#define BLEND_NOOP3             0x0c48  // In * 0 + Mem * 1
+#define BLEND_PASS              0x0c08  // In * 0 + In * 1
+#define BLEND_FOG_MEM_IN_MEM    0x0440  // In * AFog + Mem * 1-A
+#define BLEND_FOG_MEM_FOG_MEM   0x04c0  // In * AFog + Fog * 1-A
+#define BLEND_OPA               0x0044  //  In * AIn + Mem * AMem
+#define BLEND_XLU               0x0040
+#define BLEND_MEM_ALPHA_IN      0x4044  //  Mem * AIn + Mem * AMem
+
+
+#define OGL_FRAMETIME_NUM       8
+
+struct GLVertex
+{
+    float x, y, z, w;
+    struct
+    {
+        float r, g, b, a;
+    } color, secondaryColor;
+    float s0, t0, s1, t1;
+};
+
+struct GLcolor
+{
+    float r, g, b, a;
+};
+
+struct GLInfo
+{
+#ifdef USE_SDL
+// TODO: More EGL stuff, need to do this in Java
+    SDL_Surface *hScreen;  // TODO: Do we really need this?  Only using it in one place AFAICT..
+/*
+    struct
+    {
+        EGLint                     version_major, version_minor;
+        EGLDisplay              display;
+        EGLContext              context;
+        EGLConfig               config;
+        EGLSurface              surface;
+        EGLNativeDisplayType    device;
+        EGLNativeWindowType     handle;
+    } EGL;
+*/
+#endif
+
+    bool    screenUpdate;
+
+    struct
+    {
+        GLuint fb,depth_buffer, color_buffer;
+    } framebuffer;
+
+
+    int     frameSkipped;
+    unsigned consecutiveSkips;
+    unsigned frameTime[OGL_FRAMETIME_NUM];
+
+    int     frame_vsync, frame_actual, frame_dl;
+    int     frame_prevdl;
+    int     mustRenderDlist;
+    int     renderingToTexture;
+
+
+    GLint   defaultProgram;
+    GLint   defaultVertShader;
+    GLint   defaultFragShader;
+
+    float   scaleX, scaleY;
+
+#define INDEXMAP_SIZE 64
+#define VERTBUFF_SIZE 256
+#define ELEMBUFF_SIZE 1024
+
+    struct {
+        SPVertex    vertices[VERTBUFF_SIZE];
+        GLubyte     elements[ELEMBUFF_SIZE];
+        int         num;
+
+//#ifdef __TRIBUFFER_OPT
+
+        u32     indexmap[INDEXMAP_SIZE];
+        u32     indexmapinv[VERTBUFF_SIZE];
+        u32     indexmap_prev;
+        u32     indexmap_nomap;
+//#endif
+
+    } triangles;
+
+
+    unsigned int    renderState;
+
+    GLVertex rect[4];
+};
+
+extern GLInfo OGL;
+
+bool OGL_Start();
+void OGL_Stop();
+
+void OGL_AddTriangle(int v0, int v1, int v2);
+void OGL_DrawTriangles();
+void OGL_DrawTriangle(SPVertex *vertices, int v0, int v1, int v2);
+void OGL_DrawLine(int v0, int v1, float width);
+void OGL_DrawRect(int ulx, int uly, int lrx, int lry, float *color);
+void OGL_DrawTexturedRect(float ulx, float uly, float lrx, float lry, float uls, float ult, float lrs, float lrt, bool flip );
+
+void OGL_UpdateFrameTime();
+void OGL_UpdateScale();
+void OGL_UpdateStates();
+void OGL_UpdateViewport();
+void OGL_UpdateScissor();
+void OGL_UpdateCullFace();
+
+void OGL_ClearDepthBuffer();
+void OGL_ClearColorBuffer(float *color);
+void OGL_ResizeWindow(int x, int y, int width, int height);
+void OGL_SwapBuffers();
+void OGL_ReadScreen( void *dest, int *width, int *height );
+
+int  OGL_CheckError();
+int  OGL_IsExtSupported( const char *extension );
+#endif
+
diff --git a/source/gles2n64/src/RDP.cpp b/source/gles2n64/src/RDP.cpp
new file mode 100644 (file)
index 0000000..d3ffdd9
--- /dev/null
@@ -0,0 +1,347 @@
+#include "N64.h"
+#include "RSP.h"
+#include "GBI.h"
+#include "gDP.h"
+#include "Types.h"
+#include "Debug.h"
+#include "Common.h"
+#include "gSP.h"
+
+void RDP_Unknown( u32 w0, u32 w1 )
+{
+}
+
+void RDP_NoOp( u32 w0, u32 w1 )
+{
+    gSPNoOp();
+}
+
+void RDP_SetCImg( u32 w0, u32 w1 )
+{
+    gDPSetColorImage( _SHIFTR( w0, 21,  3 ),        // fmt
+                      _SHIFTR( w0, 19,  2 ),        // siz
+                      _SHIFTR( w0,  0, 12 ) + 1,    // width
+                      w1 );                         // img
+}
+
+void RDP_SetZImg( u32 w0, u32 w1 )
+{
+    gDPSetDepthImage( w1 ); // img
+}
+
+void RDP_SetTImg( u32 w0, u32 w1 )
+{
+    gDPSetTextureImage( _SHIFTR( w0, 21,  3),       // fmt
+                        _SHIFTR( w0, 19,  2 ),      // siz
+                        _SHIFTR( w0,  0, 12 ) + 1,  // width
+                        w1 );                       // img
+}
+
+void RDP_SetCombine( u32 w0, u32 w1 )
+{
+    gDPSetCombine( _SHIFTR( w0, 0, 24 ),    // muxs0
+                   w1 );                    // muxs1
+}
+
+void RDP_SetEnvColor( u32 w0, u32 w1 )
+{
+    gDPSetEnvColor( _SHIFTR( w1, 24, 8 ),       // r
+                    _SHIFTR( w1, 16, 8 ),       // g
+                    _SHIFTR( w1,  8, 8 ),       // b
+                    _SHIFTR( w1,  0, 8 ) );     // a
+}
+
+void RDP_SetPrimColor( u32 w0, u32 w1 )
+{
+    gDPSetPrimColor( _SHIFTL( w0,  8, 8 ),      // m
+                     _SHIFTL( w0,  0, 8 ),      // l
+                     _SHIFTR( w1, 24, 8 ),      // r
+                     _SHIFTR( w1, 16, 8 ),      // g
+                     _SHIFTR( w1,  8, 8 ),      // b
+                     _SHIFTR( w1,  0, 8 ) );    // a
+
+}
+
+void RDP_SetBlendColor( u32 w0, u32 w1 )
+{
+    gDPSetBlendColor( _SHIFTR( w1, 24, 8 ),     // r
+                      _SHIFTR( w1, 16, 8 ),     // g
+                      _SHIFTR( w1,  8, 8 ),     // b
+                      _SHIFTR( w1,  0, 8 ) );   // a
+}
+
+void RDP_SetFogColor( u32 w0, u32 w1 )
+{
+    gDPSetFogColor( _SHIFTR( w1, 24, 8 ),       // r
+                    _SHIFTR( w1, 16, 8 ),       // g
+                    _SHIFTR( w1,  8, 8 ),       // b
+                    _SHIFTR( w1,  0, 8 ) );     // a
+}
+
+void RDP_SetFillColor( u32 w0, u32 w1 )
+{
+    gDPSetFillColor( w1 );
+}
+
+void RDP_FillRect( u32 w0, u32 w1 )
+{
+    gDPFillRectangle( _SHIFTR( w1, 14, 10 ),    // ulx
+                      _SHIFTR( w1,  2, 10 ),    // uly
+                      _SHIFTR( w0, 14, 10 ),    // lrx
+                      _SHIFTR( w0,  2, 10 ) );  // lry
+}
+
+void RDP_SetTile( u32 w0, u32 w1 )
+{
+
+    gDPSetTile( _SHIFTR( w0, 21, 3 ),   // fmt
+                _SHIFTR( w0, 19, 2 ),   // siz
+                _SHIFTR( w0,  9, 9 ),   // line
+                _SHIFTR( w0,  0, 9 ),   // tmem
+                _SHIFTR( w1, 24, 3 ),   // tile
+                _SHIFTR( w1, 20, 4 ),   // palette
+                _SHIFTR( w1, 18, 2 ),   // cmt
+                _SHIFTR( w1,  8, 2 ),   // cms
+                _SHIFTR( w1, 14, 4 ),   // maskt
+                _SHIFTR( w1,  4, 4 ),   // masks
+                _SHIFTR( w1, 10, 4 ),   // shiftt
+                _SHIFTR( w1,  0, 4 ) ); // shifts
+}
+
+void RDP_LoadTile( u32 w0, u32 w1 )
+{
+    gDPLoadTile( _SHIFTR( w1, 24,  3 ),     // tile
+                 _SHIFTR( w0, 12, 12 ),     // uls
+                 _SHIFTR( w0,  0, 12 ),     // ult
+                 _SHIFTR( w1, 12, 12 ),     // lrs
+                 _SHIFTR( w1,  0, 12 ) );   // lrt
+}
+
+void RDP_LoadBlock( u32 w0, u32 w1 )
+{
+    gDPLoadBlock( _SHIFTR( w1, 24,  3 ),    // tile
+                  _SHIFTR( w0, 12, 12 ),    // uls
+                  _SHIFTR( w0,  0, 12 ),    // ult
+                  _SHIFTR( w1, 12, 12 ),    // lrs
+                  _SHIFTR( w1,  0, 12 ) );  // dxt
+}
+
+void RDP_SetTileSize( u32 w0, u32 w1 )
+{
+    gDPSetTileSize( _SHIFTR( w1, 24,  3 ),      // tile
+                    _SHIFTR( w0, 12, 12 ),      // uls
+                    _SHIFTR( w0,  0, 12 ),      // ult
+                    _SHIFTR( w1, 12, 12 ),      // lrs
+                    _SHIFTR( w1,  0, 12 ) );    // lrt
+}
+
+void RDP_LoadTLUT( u32 w0, u32 w1 )
+{
+    gDPLoadTLUT( _SHIFTR( w1, 24,  3 ), // tile
+                  _SHIFTR( w0, 12, 12 ),    // uls
+                  _SHIFTR( w0,  0, 12 ),    // ult
+                  _SHIFTR( w1, 12, 12 ),    // lrs
+                  _SHIFTR( w1,  0, 12 ) );  // lrt
+}
+
+void RDP_SetOtherMode( u32 w0, u32 w1 )
+{
+    gDPSetOtherMode( _SHIFTR( w0, 0, 24 ),  // mode0
+                     w1 );                  // mode1
+}
+
+void RDP_SetPrimDepth( u32 w0, u32 w1 )
+{
+    gDPSetPrimDepth( _SHIFTR( w1, 16, 16 ),     // z
+                     _SHIFTR( w1,  0, 16 ) );   // dz
+}
+
+void RDP_SetScissor( u32 w0, u32 w1 )
+{
+    gDPSetScissor( _SHIFTR( w1, 24, 2 ),                        // mode
+                   _FIXED2FLOAT( _SHIFTR( w0, 12, 12 ), 2 ),    // ulx
+                   _FIXED2FLOAT( _SHIFTR( w0,  0, 12 ), 2 ),    // uly
+                   _FIXED2FLOAT( _SHIFTR( w1, 12, 12 ), 2 ),    // lrx
+                   _FIXED2FLOAT( _SHIFTR( w1,  0, 12 ), 2 ) );  // lry
+}
+
+void RDP_SetConvert( u32 w0, u32 w1 )
+{
+    gDPSetConvert( _SHIFTR( w0, 13, 9 ),    // k0
+                   _SHIFTR( w0,  4, 9 ),    // k1
+                   _SHIFTL( w0,  5, 4 ) | _SHIFTR( w1, 25, 5 ), // k2
+                   _SHIFTR( w1, 18, 9 ),    // k3
+                   _SHIFTR( w1,  9, 9 ),    // k4
+                   _SHIFTR( w1,  0, 9 ) );  // k5
+}
+
+void RDP_SetKeyR( u32 w0, u32 w1 )
+{
+    gDPSetKeyR( _SHIFTR( w1,  8,  8 ),      // cR
+                _SHIFTR( w1,  0,  8 ),      // sR
+                _SHIFTR( w1, 16, 12 ) );    // wR
+}
+
+void RDP_SetKeyGB( u32 w0, u32 w1 )
+{
+    gDPSetKeyGB( _SHIFTR( w1, 24,  8 ),     // cG
+                 _SHIFTR( w1, 16,  8 ),     // sG
+                 _SHIFTR( w0, 12, 12 ),     // wG
+                 _SHIFTR( w1,  8,  8 ),     // cB
+                 _SHIFTR( w1,  0,  8 ),     // SB
+                 _SHIFTR( w0,  0, 12 ) );   // wB
+}
+
+void RDP_FullSync( u32 w0, u32 w1 )
+{
+    gDPFullSync();
+}
+
+void RDP_TileSync( u32 w0, u32 w1 )
+{
+//    gDPTileSync();
+}
+
+void RDP_PipeSync( u32 w0, u32 w1 )
+{
+//    gDPPipeSync();
+}
+
+void RDP_LoadSync( u32 w0, u32 w1 )
+{
+//    gDPLoadSync();
+}
+
+void RDP_TexRectFlip( u32 w0, u32 w1 )
+{
+    u32 w2 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4];
+    RSP.PC[RSP.PCi] += 8;
+
+    u32 w3 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4];
+    RSP.PC[RSP.PCi] += 8;
+
+    gDPTextureRectangleFlip( _FIXED2FLOAT( _SHIFTR( w1, 12, 12 ), 2 ),          // ulx
+                             _FIXED2FLOAT( _SHIFTR( w1,  0, 12 ), 2 ),          // uly
+                             _FIXED2FLOAT( _SHIFTR( w0, 12, 12 ), 2 ),          // lrx
+                             _FIXED2FLOAT( _SHIFTR( w0,  0, 12 ), 2 ),          // lry
+                             _SHIFTR( w1, 24,  3 ),                             // tile
+                             _FIXED2FLOAT( (s16)_SHIFTR( w2, 16, 16 ), 5 ),     // s
+                             _FIXED2FLOAT( (s16)_SHIFTR( w2,  0, 16 ), 5 ),     // t
+                             _FIXED2FLOAT( (s16)_SHIFTR( w3, 16, 16 ), 10 ),    // dsdx
+                             _FIXED2FLOAT( (s16)_SHIFTR( w3,  0, 16 ), 10 ) );  // dsdy
+}
+
+void RDP_TexRect( u32 w0, u32 w1 )
+{
+    u32 w2 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4];
+    RSP.PC[RSP.PCi] += 8;
+
+    u32 w3 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4];
+    RSP.PC[RSP.PCi] += 8;
+
+    gDPTextureRectangle( _FIXED2FLOAT( _SHIFTR( w1, 12, 12 ), 2 ),          // ulx
+                         _FIXED2FLOAT( _SHIFTR( w1,  0, 12 ), 2 ),          // uly
+                         _FIXED2FLOAT( _SHIFTR( w0, 12, 12 ), 2 ),          // lrx
+                         _FIXED2FLOAT( _SHIFTR( w0,  0, 12 ), 2 ),          // lry
+                         _SHIFTR( w1, 24,  3 ),                             // tile
+                         _FIXED2FLOAT( (s16)_SHIFTR( w2, 16, 16 ), 5 ),     // s
+                         _FIXED2FLOAT( (s16)_SHIFTR( w2,  0, 16 ), 5 ),     // t
+                         _FIXED2FLOAT( (s16)_SHIFTR( w3, 16, 16 ), 10 ),    // dsdx
+                         _FIXED2FLOAT( (s16)_SHIFTR( w3,  0, 16 ), 10 ) );  // dsdy
+}
+
+
+//Low Level RDP Drawing Commands:
+void RDP_TriFill(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_FILL Command\n");
+}
+
+void RDP_TriFillZBuff(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_FILL_ZBUFF Command\n");
+}
+
+void RDP_TriTxtr(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_TXTR Command\n");
+}
+
+void RDP_TriTxtrZBuff(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_TXTR_ZBUFF Command\n");
+}
+
+void RDP_TriShade(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_SHADE Command\n");
+}
+
+void RDP_TriShadeZBuff(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_SHADE_ZBUFF Command\n");
+}
+
+void RDP_TriShadeTxtr(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_SHADE_TXTR Command\n");
+}
+
+void RDP_TriShadeTxtrZBuff(u32 w0, u32 w1)
+{
+    LOG(LOG_VERBOSE, "RSP_TRI_SHADE_TXTR_ZBUFF Command\n");
+}
+
+void RDP_Init()
+{
+    // Initialize RDP commands to RDP_UNKNOWN
+    for (int i = 0xC8; i <= 0xCF; i++)
+        GBI.cmd[i] = RDP_Unknown;
+
+    // Initialize RDP commands to RDP_UNKNOWN
+    for (int i = 0xE4; i <= 0xFF; i++)
+        GBI.cmd[i] = RDP_Unknown;
+
+    // Set known GBI commands
+    GBI.cmd[G_NOOP]             = RDP_NoOp;
+    GBI.cmd[G_SETCIMG]          = RDP_SetCImg;
+    GBI.cmd[G_SETZIMG]          = RDP_SetZImg;
+    GBI.cmd[G_SETTIMG]          = RDP_SetTImg;
+    GBI.cmd[G_SETCOMBINE]       = RDP_SetCombine;
+    GBI.cmd[G_SETENVCOLOR]      = RDP_SetEnvColor;
+    GBI.cmd[G_SETPRIMCOLOR]     = RDP_SetPrimColor;
+    GBI.cmd[G_SETBLENDCOLOR]    = RDP_SetBlendColor;
+    GBI.cmd[G_SETFOGCOLOR]      = RDP_SetFogColor;
+    GBI.cmd[G_SETFILLCOLOR]     = RDP_SetFillColor;
+    GBI.cmd[G_FILLRECT]         = RDP_FillRect;
+    GBI.cmd[G_SETTILE]          = RDP_SetTile;
+    GBI.cmd[G_LOADTILE]         = RDP_LoadTile;
+    GBI.cmd[G_LOADBLOCK]        = RDP_LoadBlock;
+    GBI.cmd[G_SETTILESIZE]      = RDP_SetTileSize;
+    GBI.cmd[G_LOADTLUT]         = RDP_LoadTLUT;
+    GBI.cmd[G_RDPSETOTHERMODE]  = RDP_SetOtherMode;
+    GBI.cmd[G_SETPRIMDEPTH]     = RDP_SetPrimDepth;
+    GBI.cmd[G_SETSCISSOR]       = RDP_SetScissor;
+    GBI.cmd[G_SETCONVERT]       = RDP_SetConvert;
+    GBI.cmd[G_SETKEYR]          = RDP_SetKeyR;
+    GBI.cmd[G_SETKEYGB]         = RDP_SetKeyGB;
+    GBI.cmd[G_RDPFULLSYNC]      = RDP_FullSync;
+    GBI.cmd[G_RDPTILESYNC]      = RDP_TileSync;
+    GBI.cmd[G_RDPPIPESYNC]      = RDP_PipeSync;
+    GBI.cmd[G_RDPLOADSYNC]      = RDP_LoadSync;
+    GBI.cmd[G_TEXRECTFLIP]      = RDP_TexRectFlip;
+    GBI.cmd[G_TEXRECT]          = RDP_TexRect;
+
+    GBI.cmd[G_RDPNOOP]          = RDP_NoOp;
+
+    //Low Level RDP Drawing Commands:
+    GBI.cmd[G_TRI_FILL]             = RDP_TriFill;
+    GBI.cmd[G_TRI_FILL_ZBUFF]       = RDP_TriFillZBuff;
+    GBI.cmd[G_TRI_TXTR]             = RDP_TriTxtr;
+    GBI.cmd[G_TRI_TXTR_ZBUFF]       = RDP_TriTxtrZBuff;
+    GBI.cmd[G_TRI_SHADE]            = RDP_TriShade;
+    GBI.cmd[G_TRI_SHADE_TXTR]       = RDP_TriShadeTxtr;
+    GBI.cmd[G_TRI_SHADE_TXTR_ZBUFF] = RDP_TriShadeTxtrZBuff;
+
+}
+
diff --git a/source/gles2n64/src/RDP.h b/source/gles2n64/src/RDP.h
new file mode 100644 (file)
index 0000000..73e970b
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef RDP_H
+#define RDP_H
+
+void RDP_Init();
+
+#endif
+
diff --git a/source/gles2n64/src/RSP.cpp b/source/gles2n64/src/RSP.cpp
new file mode 100644 (file)
index 0000000..cb4e7d3
--- /dev/null
@@ -0,0 +1,150 @@
+#include <math.h>
+#include "Common.h"
+#include "gles2N64.h"
+#include "OpenGL.h"
+#include "Debug.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "N64.h"
+#include "F3D.h"
+#include "3DMath.h"
+#include "VI.h"
+#include "ShaderCombiner.h"
+#include "DepthBuffer.h"
+#include "GBI.h"
+#include "gSP.h"
+#include "Textures.h"
+
+//#define PRINT_DISPLAYLIST
+//#define PRINT_DISPLAYLIST_NUM 1
+
+RSPInfo     RSP;
+
+void RSP_LoadMatrix( f32 mtx[4][4], u32 address )
+{
+
+    f32 recip = 1.5258789e-05f;
+
+    struct _N64Matrix
+    {
+        s16 integer[4][4];
+        u16 fraction[4][4];
+    } *n64Mat = (struct _N64Matrix *)&RDRAM[address];
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            mtx[i][j] = (GLfloat)(n64Mat->integer[i][j^1]) + (GLfloat)(n64Mat->fraction[i][j^1]) * recip;
+}
+
+void RSP_ProcessDList()
+{
+    VI_UpdateSize();
+    OGL_UpdateScale();
+    TextureCache_ActivateNoise(2);
+
+    RSP.PC[0] = *(u32*)&DMEM[0x0FF0];
+    RSP.PCi = 0;
+    RSP.count = 0;
+
+    RSP.halt = FALSE;
+    RSP.busy = TRUE;
+
+#ifdef __TRIBUFFER_OPT
+    __indexmap_clear();
+#endif
+
+    gSP.matrix.stackSize = min( 32, *(u32*)&DMEM[0x0FE4] >> 6 );
+    gSP.matrix.modelViewi = 0;
+    gSP.changed |= CHANGED_MATRIX;
+
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            gSP.matrix.modelView[0][i][j] = 0.0f;
+
+    gSP.matrix.modelView[0][0][0] = 1.0f;
+    gSP.matrix.modelView[0][1][1] = 1.0f;
+    gSP.matrix.modelView[0][2][2] = 1.0f;
+    gSP.matrix.modelView[0][3][3] = 1.0f;
+
+    u32 uc_start = *(u32*)&DMEM[0x0FD0];
+    u32 uc_dstart = *(u32*)&DMEM[0x0FD8];
+    u32 uc_dsize = *(u32*)&DMEM[0x0FDC];
+
+    if ((uc_start != RSP.uc_start) || (uc_dstart != RSP.uc_dstart))
+        gSPLoadUcodeEx( uc_start, uc_dstart, uc_dsize );
+
+    gDPSetAlphaCompare(G_AC_NONE);
+    gDPSetDepthSource(G_ZS_PIXEL);
+    gDPSetRenderMode(0, 0);
+    gDPSetAlphaDither(G_AD_DISABLE);
+    gDPSetColorDither(G_CD_DISABLE);
+    gDPSetCombineKey(G_CK_NONE);
+    gDPSetTextureConvert(G_TC_FILT);
+    gDPSetTextureFilter(G_TF_POINT);
+    gDPSetTextureLUT(G_TT_NONE);
+    gDPSetTextureLOD(G_TL_TILE);
+    gDPSetTextureDetail(G_TD_CLAMP);
+    gDPSetTexturePersp(G_TP_PERSP);
+    gDPSetCycleType(G_CYC_1CYCLE);
+    gDPPipelineMode(G_PM_NPRIMITIVE);
+
+#ifdef PRINT_DISPLAYLIST
+    if ((RSP.DList%PRINT_DISPLAYLIST_NUM) == 0) LOG(LOG_VERBOSE, "BEGIN DISPLAY LIST %i \n", RSP.DList);
+#endif
+
+    while (!RSP.halt)
+    {
+        u32 pc = RSP.PC[RSP.PCi];
+
+        if ((pc + 8) > RDRAMSize)
+        {
+#ifdef DEBUG
+            DebugMsg( DEBUG_LOW | DEBUG_ERROR, "ATTEMPTING TO EXECUTE RSP COMMAND AT INVALID RDRAM LOCATION\n" );
+#endif
+            break;
+        }
+
+
+        u32 w0 = *(u32*)&RDRAM[pc];
+        u32 w1 = *(u32*)&RDRAM[pc+4];
+        RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[pc+8], 24, 8 );
+        RSP.cmd = _SHIFTR( w0, 24, 8 );
+        RSP.PC[RSP.PCi] += 8;
+
+#ifdef PROFILE_GBI
+        GBI_ProfileBegin(RSP.cmd);
+#endif
+
+#ifdef PRINT_DISPLAYLIST
+        if ((RSP.DList%PRINT_DISPLAYLIST_NUM) == 0) LOG(LOG_VERBOSE, "%s: w0=0x%x w1=0x%x\n", GBI_GetFuncName(GBI.current->type, RSP.cmd), w0, w1);
+#endif
+
+        GBI.cmd[RSP.cmd]( w0, w1 );
+
+#ifdef PROFILE_GBI
+        GBI_ProfileEnd(RSP.cmd);
+#endif
+    }
+
+#ifdef PRINT_DISPLAYLIST
+        if ((RSP.DList%PRINT_DISPLAYLIST_NUM) == 0) LOG(LOG_VERBOSE, "END DISPLAY LIST %i \n", RSP.DList);
+#endif
+
+    RSP.busy = FALSE;
+    RSP.DList++;
+    gSP.changed |= CHANGED_COLORBUFFER;
+}
+
+void RSP_Init()
+{
+    RDRAMSize = 1024 * 1024 * 8;
+    RSP.DList = 0;
+    RSP.uc_start = RSP.uc_dstart = 0;
+    gDP.loadTile = &gDP.tiles[7];
+    gSP.textureTile[0] = &gDP.tiles[0];
+    gSP.textureTile[1] = &gDP.tiles[1];
+
+    DepthBuffer_Init();
+    GBI_Init();
+}
+
diff --git a/source/gles2n64/src/RSP.h b/source/gles2n64/src/RSP.h
new file mode 100644 (file)
index 0000000..33c705a
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef RSP_H
+#define RSP_H
+
+#include "N64.h"
+#include "GBI.h"
+//#include "gSP.h"
+#include "Types.h"
+
+#ifndef min
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+#define RSPMSG_CLOSE            0
+#define RSPMSG_UPDATESCREEN     1
+#define RSPMSG_PROCESSDLIST     2
+#define RSPMSG_CAPTURESCREEN    3
+#define RSPMSG_DESTROYTEXTURES  4
+#define RSPMSG_INITTEXTURES     5
+
+typedef struct
+{
+    u32 PC[18], PCi, busy, halt, close, DList, uc_start, uc_dstart, cmd, nextCmd, count;
+} RSPInfo;
+
+extern RSPInfo RSP;
+
+#define RSP_SegmentToPhysical( segaddr ) ((gSP.segment[(segaddr >> 24) & 0x0F] + (segaddr & 0x00FFFFFF)) & 0x00FFFFFF)
+
+void RSP_Init();
+void RSP_ProcessDList();
+void RSP_LoadMatrix( f32 mtx[4][4], u32 address );
+
+#endif
+
diff --git a/source/gles2n64/src/S2DEX.cpp b/source/gles2n64/src/S2DEX.cpp
new file mode 100644 (file)
index 0000000..0131016
--- /dev/null
@@ -0,0 +1,102 @@
+#include "OpenGL.h"
+#include "S2DEX.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "GBI.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "RSP.h"
+#include "Types.h"
+
+void S2DEX_BG_1Cyc( u32 w0, u32 w1 )
+{
+    gSPBgRect1Cyc( w1 );
+}
+
+void S2DEX_BG_Copy( u32 w0, u32 w1 )
+{
+    gSPBgRectCopy( w1 );
+}
+
+void S2DEX_Obj_Rectangle( u32 w0, u32 w1 )
+{
+    gSPObjRectangle( w1 );
+}
+
+void S2DEX_Obj_Sprite( u32 w0, u32 w1 )
+{
+    gSPObjSprite( w1 );
+}
+
+void S2DEX_Obj_MoveMem( u32 w0, u32 w1 )
+{
+    if (_SHIFTR( w0, 0, 16 ) == 0)
+        gSPObjMatrix( w1 );
+    else
+        gSPObjSubMatrix( w1 );
+}
+
+void S2DEX_Select_DL( u32 w0, u32 w1 )
+{
+}
+
+void S2DEX_Obj_RenderMode( u32 w0, u32 w1 )
+{
+}
+
+void S2DEX_Obj_Rectangle_R( u32 w0, u32 w1 )
+{
+}
+
+void S2DEX_Obj_LoadTxtr( u32 w0, u32 w1 )
+{
+    gSPObjLoadTxtr( w1 );
+}
+
+void S2DEX_Obj_LdTx_Sprite( u32 w0, u32 w1 )
+{
+    gSPObjLoadTxSprite( w1 );
+}
+
+void S2DEX_Obj_LdTx_Rect( u32 w0, u32 w1 )
+{
+}
+
+void S2DEX_Obj_LdTx_Rect_R( u32 w0, u32 w1 )
+{
+    gSPObjLoadTxRectR( w1 );
+}
+
+void S2DEX_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3DEX );
+
+    gSP.geometryMode = 0;
+
+    GBI.PCStackSize = 18;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3D_SPNOOP,             F3D_SPNoOp );
+    GBI_SetGBI( G_BG_1CYC,              S2DEX_BG_1CYC,          S2DEX_BG_1Cyc );
+    GBI_SetGBI( G_BG_COPY,              S2DEX_BG_COPY,          S2DEX_BG_Copy );
+    GBI_SetGBI( G_OBJ_RECTANGLE,        S2DEX_OBJ_RECTANGLE,    S2DEX_Obj_Rectangle );
+    GBI_SetGBI( G_OBJ_SPRITE,           S2DEX_OBJ_SPRITE,       S2DEX_Obj_Sprite );
+    GBI_SetGBI( G_OBJ_MOVEMEM,          S2DEX_OBJ_MOVEMEM,      S2DEX_Obj_MoveMem );
+    GBI_SetGBI( G_DL,                   F3D_DL,                 F3D_DList );
+    GBI_SetGBI( G_SELECT_DL,            S2DEX_SELECT_DL,        S2DEX_Select_DL );
+    GBI_SetGBI( G_OBJ_RENDERMODE,       S2DEX_OBJ_RENDERMODE,   S2DEX_Obj_RenderMode );
+    GBI_SetGBI( G_OBJ_RECTANGLE_R,      S2DEX_OBJ_RECTANGLE_R,  S2DEX_Obj_Rectangle_R );
+    GBI_SetGBI( G_OBJ_LOADTXTR,         S2DEX_OBJ_LOADTXTR,     S2DEX_Obj_LoadTxtr );
+    GBI_SetGBI( G_OBJ_LDTX_SPRITE,      S2DEX_OBJ_LDTX_SPRITE,  S2DEX_Obj_LdTx_Sprite );
+    GBI_SetGBI( G_OBJ_LDTX_RECT,        S2DEX_OBJ_LDTX_RECT,    S2DEX_Obj_LdTx_Rect );
+    GBI_SetGBI( G_OBJ_LDTX_RECT_R,      S2DEX_OBJ_LDTX_RECT_R,  S2DEX_Obj_LdTx_Rect_R );
+    GBI_SetGBI( G_MOVEWORD,             F3D_MOVEWORD,           F3D_MoveWord );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3D_SETOTHERMODE_H,     F3D_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3D_SETOTHERMODE_L,     F3D_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3D_ENDDL,              F3D_EndDL );
+    GBI_SetGBI( G_RDPHALF_1,            F3D_RDPHALF_1,          F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3D_RDPHALF_2,          F3D_RDPHalf_2 );
+    GBI_SetGBI( G_LOAD_UCODE,           S2DEX_LOAD_UCODE,       F3DEX_Load_uCode );
+}
+
diff --git a/source/gles2n64/src/S2DEX.h b/source/gles2n64/src/S2DEX.h
new file mode 100644 (file)
index 0000000..03ae7ba
--- /dev/null
@@ -0,0 +1,219 @@
+#ifndef S2DEX_H
+#define S2DEX_H
+
+#define G_BGLT_LOADBLOCK    0x0033
+#define G_BGLT_LOADTILE     0xfff4
+
+#define G_BG_FLAG_FLIPS     0x01
+#define G_BG_FLAG_FLIPT     0x10
+
+struct uObjScaleBg
+{
+  u16 imageW;     /* Texture width (8-byte alignment, u10.2) */
+  u16 imageX;     /* x-coordinate of upper-left 
+                  position of texture (u10.5) */ 
+  u16 frameW;     /* Transfer destination frame width (u10.2) */
+  s16 frameX;     /* x-coordinate of upper-left 
+                  position of transfer destination frame (s10.2) */
+
+  u16 imageH;     /* Texture height (u10.2) */
+  u16 imageY;     /* y-coordinate of upper-left position of 
+                  texture (u10.5) */ 
+  u16 frameH;     /* Transfer destination frame height (u10.2) */
+  s16 frameY;     /* y-coordinate of upper-left position of transfer 
+                  destination  frame (s10.2) */
+
+  u32 imagePtr;  /* Address of texture source in DRAM*/
+  u8  imageSiz;   /* Texel size
+                     G_IM_SIZ_4b (4 bits/texel)
+                     G_IM_SIZ_8b (8 bits/texel)
+                     G_IM_SIZ_16b (16 bits/texel)
+                     G_IM_SIZ_32b (32 bits/texel) */
+  u8  imageFmt;   /*Texel format
+                     G_IM_FMT_RGBA (RGBA format)
+                     G_IM_FMT_YUV (YUV format)
+                     G_IM_FMT_CI (CI format)
+                     G_IM_FMT_IA (IA format)
+                     G_IM_FMT_I (I format)  */
+  u16 imageLoad;  /* Method for loading the BG image texture
+                     G_BGLT_LOADBLOCK (use LoadBlock)
+                     G_BGLT_LOADTILE (use LoadTile) */
+  u16 imageFlip;  /* Image inversion on/off (horizontal 
+                     direction only)
+                     0 (normal display (no inversion))
+                     G_BG_FLAG_FLIPS (horizontal inversion of texture image) */
+  u16 imagePal;   /* Position of palette for 4-bit color 
+                  index texture (4-bit precision, 0~15) */
+
+  u16 scaleH;      /* y-direction scale value (u5.10) */
+  u16 scaleW;      /* x-direction scale value (u5.10) */
+  s32 imageYorig;  /* image drawing origin (s20.5)*/
+  
+  u8  padding[4];  /* Padding */
+};   /* 40 bytes */
+
+struct uObjBg
+{
+    u16 imageW;     /* Texture width (8-byte alignment, u10.2) */
+    u16 imageX;     /* x-coordinate of upper-left position of texture (u10.5) */ 
+    u16 frameW;     /* Transfer destination frame width (u10.2) */
+    s16 frameX;     /* x-coordinate of upper-left position of 
+                      transfer destination frame (s10.2) */
+    u16 imageH;     /* Texture height (u10.2) */
+    u16 imageY;     /* y-coordinate of upper-left position of 
+                      texture (u10.5) */ 
+    u16 frameH;     /* Transfer destination frame height (u10.2) */
+    s16 frameY;     /* y-coordinate of upper-left position of 
+                  transfer destination frame (s10.2) */
+
+    u32 imagePtr;  /* Address of texture source in DRAM*/
+    u8  imageSiz;   /* Texel size
+                     G_IM_SIZ_4b (4 bits/texel)
+                     G_IM_SIZ_8b (8 bits/texel)
+                     G_IM_SIZ_16b (16 bits/texel)
+                     G_IM_SIZ_32b (32 bits/texel) */
+    u8  imageFmt;   /*Texel format
+                     G_IM_FMT_RGBA (RGBA format)
+                     G_IM_FMT_YUV (YUV format)
+                     G_IM_FMT_CI (CI format)
+                     G_IM_FMT_IA (IA format)
+                     G_IM_FMT_I (I format)  */
+    u16 imageLoad;  /* Method for loading the BG image texture
+                     G_BGLT_LOADBLOCK (use LoadBlock)
+                     G_BGLT_LOADTILE (use LoadTile) */
+    u16 imageFlip;  /* Image inversion on/off (horizontal direction only)
+                     0 (normal display (no inversion))
+                     G_BG_FLAG_FLIPS (horizontal inversion of 
+                     texture image) */
+    u16 imagePal;   /* Position of palette for 4-bit color 
+                     index texture (4-bit precision, 0~15) */
+
+/* The following is set in the initialization routine guS2DInitBg */
+    u16 tmemH;      /* TMEM height for a single load (quadruple 
+                     value, s13.2) */
+    u16 tmemW;      /* TMEM width for one frame line (word size) */
+    u16 tmemLoadTH; /* TH value or Stride value */
+    u16 tmemLoadSH; /* SH value */
+    u16 tmemSize;   /* imagePtr skip value for a single load  */
+    u16 tmemSizeW;  /* imagePtr skip value for one image line */
+};      /* 40 bytes */
+
+struct uObjSprite
+{
+    u16 scaleW;      /* Width-direction scaling (u5.10) */
+    s16 objX;        /* x-coordinate of upper-left corner of OBJ (s10.2) */
+    u16 paddingX;    /* Unused (always 0) */
+    u16 imageW;      /* Texture width (length in s direction, u10.5)  */
+    u16 scaleH;      /* Height-direction scaling (u5.10) */
+    s16 objY;        /* y-coordinate of upper-left corner of OBJ (s10.2) */
+    u16 paddingY;    /* Unused (always 0) */
+    u16 imageH;      /* Texture height (length in t direction, u10.5)  */
+    u16 imageAdrs;   /* Texture starting position in TMEM (In units of 64-bit words) */
+    u16 imageStride; /* Texel wrapping width (In units of 64-bit words) */
+    u8  imageFlags;  /* Display flag
+                (*) More than one of the following flags can be specified as the bit sum of the flags: 
+                      0 (Normal display (no inversion))
+                      G_OBJ_FLAG_FLIPS (s-direction (x) inversion)
+                      G_OBJ_FLAG_FLIPT (t-direction (y) inversion)  */
+    u8  imagePal;    /* Position of palette for 4-bit color index texture  (4-bit precision, 0~7)  */
+    u8  imageSiz;    /* Texel size
+                      G_IM_SIZ_4b (4 bits/texel)
+                      G_IM_SIZ_8b (8 bits/texel)
+                      G_IM_SIZ_16b (16 bits/texel)
+                      G_IM_SIZ_32b (32 bits/texel) */
+    u8  imageFmt;    /* Texel format
+                      G_IM_FMT_RGBA (RGBA format)
+                      G_IM_FMT_YUV (YUV format)
+                      G_IM_FMT_CI (CI format)
+                      G_IM_FMT_IA (IA format)
+                      G_IM_FMT_I  (I format) */
+};    /* 24 bytes */
+
+struct uObjTxtrBlock
+{
+    u32   type;   /* Structure identifier (G_OBJLT_TXTRBLOCK) */
+    u32   image; /* Texture source address in DRAM (8-byte alignment) */
+    u16   tsize;  /* Texture size (specified by GS_TB_TSIZE) */
+    u16   tmem;   /* TMEM word address where texture will be loaded (8-byte word) */
+    u16   sid;    /* Status ID (multiple of 4: either 0, 4, 8, or 12) */
+    u16   tline;  /* Texture line width (specified by GS_TB_TLINE) */
+    u32   flag;   /* Status flag */
+    u32   mask;   /* Status mask */
+};     /* 24 bytes */
+
+struct uObjTxtrTile
+{
+    u32   type;   /* Structure identifier (G_OBJLT_TXTRTILE) */
+    u32   image; /* Texture source address in DRAM (8-byte alignment) */
+    u16   twidth; /* Texture width (specified by GS_TT_TWIDTH) */
+    u16   tmem;   /* TMEM word address where texture will be loaded (8-byte word) */
+    u16   sid;    /* Status ID (multiple of 4: either 0, 4, 8, or 12) */
+    u16   theight;/* Texture height (specified by GS_TT_THEIGHT) */
+    u32   flag;   /* Status flag */
+    u32   mask;   /* Status mask  */
+};      /* 24 bytes */
+
+struct uObjTxtrTLUT
+{
+    u32   type;   /* Structure identifier (G_OBJLT_TLUT) */
+    u32   image; /* Texture source address in DRAM */
+    u16   pnum;   /* Number of palettes to load - 1 */
+    u16   phead;  /* Palette position at start of load (256~511) */
+    u16   sid;    /* Status ID (multiple of 4: either 0, 4, 8, or 12) */
+    u16   zero;   /* Always assign 0 */
+    u32   flag;   /* Status flag */
+    u32   mask;   /* Status mask */
+};      /* 24 bytes */
+
+typedef union 
+{
+    uObjTxtrBlock      block;
+    uObjTxtrTile       tile;
+    uObjTxtrTLUT       tlut;
+} uObjTxtr;
+
+struct uObjTxSprite 
+{
+    uObjTxtr      txtr;
+    uObjSprite    sprite;
+};
+
+struct uObjMtx
+{
+  s32 A, B, C, D;   /* s15.16 */
+  s16 Y, X;         /* s10.2 */
+  u16 BaseScaleY;   /* u5.10 */
+  u16 BaseScaleX;   /* u5.10 */
+};
+
+void S2DEX_BG_1Cyc( u32 w0, u32 w1 );
+void S2DEX_BG_Copy( u32 w0, u32 w1 );
+void S2DEX_Obj_Rectangle( u32 w0, u32 w1 );
+void S2DEX_Obj_Sprite( u32 w0, u32 w1 );
+void S2DEX_Obj_MoveMem( u32 w0, u32 w1 );
+void S2DEX_Select_DL( u32 w0, u32 w1 );
+void S2DEX_Obj_RenderMode( u32 w0, u32 w1 );
+void S2DEX_Obj_Rectangle_R( u32 w0, u32 w1 );
+void S2DEX_Obj_LoadTxtr( u32 w0, u32 w1 );
+void S2DEX_Obj_LdTx_Sprite( u32 w0, u32 w1 );
+void S2DEX_Obj_LdTx_Rect( u32 w0, u32 w1 );
+void S2DEX_Obj_LdTx_Rect_R( u32 w0, u32 w1 );
+void S2DEX_Init();
+
+#define S2DEX_BG_1CYC           0x01
+#define S2DEX_BG_COPY           0x02
+#define S2DEX_OBJ_RECTANGLE     0x03
+#define S2DEX_OBJ_SPRITE        0x04
+#define S2DEX_OBJ_MOVEMEM       0x05
+#define S2DEX_LOAD_UCODE        0xAF
+#define S2DEX_SELECT_DL         0xB0
+#define S2DEX_OBJ_RENDERMODE    0xB1
+#define S2DEX_OBJ_RECTANGLE_R   0xB2
+#define S2DEX_OBJ_LOADTXTR      0xC1
+#define S2DEX_OBJ_LDTX_SPRITE   0xC2
+#define S2DEX_OBJ_LDTX_RECT     0xC3
+#define S2DEX_OBJ_LDTX_RECT_R   0xC4
+#define S2DEX_RDPHALF_0         0xE4
+
+#endif
+
diff --git a/source/gles2n64/src/S2DEX2.cpp b/source/gles2n64/src/S2DEX2.cpp
new file mode 100644 (file)
index 0000000..18f0850
--- /dev/null
@@ -0,0 +1,45 @@
+#include "OpenGL.h"
+#include "S2DEX.h"
+#include "S2DEX2.h"
+#include "F3D.h"
+#include "F3DEX.h"
+#include "F3DEX2.h"
+#include "GBI.h"
+#include "gSP.h" 
+#include "gDP.h"
+#include "RSP.h"
+#include "Types.h"
+
+void S2DEX2_Init()
+{
+    // Set GeometryMode flags
+    GBI_InitFlags( F3DEX2 );
+
+    gSP.geometryMode = 0;
+
+    GBI.PCStackSize = 18;
+
+    //          GBI Command             Command Value           Command Function
+    GBI_SetGBI( G_SPNOOP,               F3DEX2_SPNOOP,          F3D_SPNoOp );
+    GBI_SetGBI( G_BG_1CYC,              S2DEX2_BG_1CYC,         S2DEX_BG_1Cyc );
+    GBI_SetGBI( G_BG_COPY,              S2DEX2_BG_COPY,         S2DEX_BG_Copy );
+    GBI_SetGBI( G_OBJ_RECTANGLE,        S2DEX2_OBJ_RECTANGLE,   S2DEX_Obj_Rectangle );
+    GBI_SetGBI( G_OBJ_SPRITE,           S2DEX2_OBJ_SPRITE,      S2DEX_Obj_Sprite );
+    GBI_SetGBI( G_OBJ_MOVEMEM,          S2DEX2_OBJ_MOVEMEM,     S2DEX_Obj_MoveMem );
+    GBI_SetGBI( G_DL,                   F3DEX2_DL,              F3D_DList );
+    GBI_SetGBI( G_SELECT_DL,            S2DEX2_SELECT_DL,       S2DEX_Select_DL );
+    GBI_SetGBI( G_OBJ_RENDERMODE,       S2DEX2_OBJ_RENDERMODE,  S2DEX_Obj_RenderMode );
+    GBI_SetGBI( G_OBJ_RECTANGLE_R,      S2DEX2_OBJ_RECTANGLE_R, S2DEX_Obj_Rectangle_R );
+    GBI_SetGBI( G_OBJ_LOADTXTR,         S2DEX2_OBJ_LOADTXTR,    S2DEX_Obj_LoadTxtr );
+    GBI_SetGBI( G_OBJ_LDTX_SPRITE,      S2DEX2_OBJ_LDTX_SPRITE, S2DEX_Obj_LdTx_Sprite );
+    GBI_SetGBI( G_OBJ_LDTX_RECT,        S2DEX2_OBJ_LDTX_RECT,   S2DEX_Obj_LdTx_Rect );
+    GBI_SetGBI( G_OBJ_LDTX_RECT_R,      S2DEX2_OBJ_LDTX_RECT_R, S2DEX_Obj_LdTx_Rect_R );
+    GBI_SetGBI( G_MOVEWORD,             F3DEX2_MOVEWORD,        F3DEX2_MoveWord );
+    GBI_SetGBI( G_SETOTHERMODE_H,       F3DEX2_SETOTHERMODE_H,  F3DEX2_SetOtherMode_H );
+    GBI_SetGBI( G_SETOTHERMODE_L,       F3DEX2_SETOTHERMODE_L,  F3DEX2_SetOtherMode_L );
+    GBI_SetGBI( G_ENDDL,                F3DEX2_ENDDL,           F3D_EndDL );
+    GBI_SetGBI( G_RDPHALF_1,            F3DEX2_RDPHALF_1,       F3D_RDPHalf_1 );
+    GBI_SetGBI( G_RDPHALF_2,            F3DEX2_RDPHALF_2,       F3D_RDPHalf_2 );
+    GBI_SetGBI( G_LOAD_UCODE,           F3DEX2_LOAD_UCODE,      F3DEX_Load_uCode );
+}
+
diff --git a/source/gles2n64/src/S2DEX2.h b/source/gles2n64/src/S2DEX2.h
new file mode 100644 (file)
index 0000000..cbe98d7
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef S2DEX2_H
+#define S2DEX2_H
+
+void S2DEX2_Init();
+
+#define S2DEX2_OBJ_RECTANGLE_R  0xDA
+#define S2DEX2_OBJ_MOVEMEM      0xDC
+#define S2DEX2_RDPHALF_0        0xE4
+#define S2DEX2_OBJ_RECTANGLE    0x01
+#define S2DEX2_OBJ_SPRITE       0x02
+#define S2DEX2_SELECT_DL        0x04
+#define S2DEX2_OBJ_LOADTXTR     0x05
+#define S2DEX2_OBJ_LDTX_SPRITE  0x06
+#define S2DEX2_OBJ_LDTX_RECT    0x07
+#define S2DEX2_OBJ_LDTX_RECT_R  0x08
+#define S2DEX2_BG_1CYC          0x09
+#define S2DEX2_BG_COPY          0x0A
+#define S2DEX2_OBJ_RENDERMODE   0x0B
+#endif
+
diff --git a/source/gles2n64/src/ShaderCombiner.cpp b/source/gles2n64/src/ShaderCombiner.cpp
new file mode 100755 (executable)
index 0000000..86ffa51
--- /dev/null
@@ -0,0 +1,844 @@
+
+#include <stdlib.h>
+#include "OpenGL.h"
+#include "ShaderCombiner.h"
+#include "Common.h"
+#include "Textures.h"
+#include "Config.h"
+
+
+//(sa - sb) * m + a
+static const u32 saRGBExpanded[] =
+{
+    COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
+    SHADE,              ENVIRONMENT,        ONE,                NOISE,
+    ZERO,               ZERO,               ZERO,               ZERO,
+    ZERO,               ZERO,               ZERO,               ZERO
+};
+
+static const u32 sbRGBExpanded[] =
+{
+    COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
+    SHADE,              ENVIRONMENT,        CENTER,             K4,
+    ZERO,               ZERO,               ZERO,               ZERO,
+    ZERO,               ZERO,               ZERO,               ZERO
+};
+
+static const u32 mRGBExpanded[] =
+{
+    COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
+    SHADE,              ENVIRONMENT,        SCALE,              COMBINED_ALPHA,
+    TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,    SHADE_ALPHA,
+    ENV_ALPHA,          LOD_FRACTION,       PRIM_LOD_FRAC,      K5,
+    ZERO,               ZERO,               ZERO,               ZERO,
+    ZERO,               ZERO,               ZERO,               ZERO,
+    ZERO,               ZERO,               ZERO,               ZERO,
+    ZERO,               ZERO,               ZERO,               ZERO
+};
+
+static const u32 aRGBExpanded[] =
+{
+    COMBINED,           TEXEL0,             TEXEL1,             PRIMITIVE,
+    SHADE,              ENVIRONMENT,        ONE,                ZERO
+};
+
+static const u32 saAExpanded[] =
+{
+    COMBINED,           TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
+    SHADE_ALPHA,        ENV_ALPHA,          ONE,                ZERO
+};
+
+static const u32 sbAExpanded[] =
+{
+    COMBINED,           TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
+    SHADE_ALPHA,        ENV_ALPHA,          ONE,                ZERO
+};
+
+static const u32 mAExpanded[] =
+{
+    LOD_FRACTION,       TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
+    SHADE_ALPHA,        ENV_ALPHA,          PRIM_LOD_FRAC,      ZERO,
+};
+
+static const u32 aAExpanded[] =
+{
+    COMBINED,           TEXEL0_ALPHA,       TEXEL1_ALPHA,       PRIMITIVE_ALPHA,
+    SHADE_ALPHA,        ENV_ALPHA,          ONE,                ZERO
+};
+
+int CCEncodeA[] = {0, 1, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 15, 15, 6, 15 };
+int CCEncodeB[] = {0, 1, 2, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 15, 15, 15 };
+int CCEncodeC[] = {0, 1, 2, 3, 4, 5, 31, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31, 31, 15, 31, 31};
+int CCEncodeD[] = {0, 1, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 6, 15};
+int ACEncodeA[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7};
+int ACEncodeB[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7};
+int ACEncodeC[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 6, 7, 7, 7, 7, 7};
+int ACEncodeD[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7};
+
+ShaderProgram *scProgramRoot = NULL;
+ShaderProgram *scProgramCurrent = NULL;
+int scProgramChanged = 0;
+int scProgramCount = 0;
+
+GLint _vertex_shader = 0;
+
+const char *_frag_header = "                                \n"\
+"uniform sampler2D uTex0;                                   \n"\
+"uniform sampler2D uTex1;                                   \n"\
+"uniform sampler2D uNoise;                                  \n"\
+"uniform lowp vec4 uEnvColor;                               \n"\
+"uniform lowp vec4 uPrimColor;                              \n"\
+"uniform lowp vec4 uFogColor;                               \n"\
+"uniform highp float uAlphaRef;                             \n"\
+"uniform lowp float uPrimLODFrac;                           \n"\
+"uniform lowp float uK4;                                    \n"\
+"uniform lowp float uK5;                                    \n"\
+"                                                           \n"\
+"varying lowp float vFactor;                                \n"\
+"varying lowp vec4 vShadeColor;                             \n"\
+"varying mediump vec2 vTexCoord0;                           \n"\
+"varying mediump vec2 vTexCoord1;                           \n"\
+"                                                           \n"\
+"void main()                                                \n"\
+"{                                                          \n"\
+"lowp vec4 lFragColor;                                      \n";
+
+
+const char *_vert = "                                       \n"\
+"attribute highp vec4  aPosition;                          \n"\
+"attribute lowp vec4   aColor;                             \n"\
+"attribute highp vec2   aTexCoord0;                         \n"\
+"attribute highp vec2   aTexCoord1;                         \n"\
+"                                                           \n"\
+"uniform bool              uEnableFog;                         \n"\
+"uniform float                 uFogMultiplier, uFogOffset;         \n"\
+"uniform float                         uRenderState;                       \n"\
+"                                                           \n"\
+"uniform mediump vec2  uTexScale;                          \n"\
+"uniform mediump vec2  uTexOffset[2];                      \n"\
+"uniform mediump vec2  uCacheShiftScale[2];                \n"\
+"uniform mediump vec2  uCacheScale[2];                     \n"\
+"uniform mediump vec2  uCacheOffset[2];                    \n"\
+"                                                           \n"\
+"varying lowp float     vFactor;                            \n"\
+"varying lowp vec4             vShadeColor;                        \n"\
+"varying mediump vec2  vTexCoord0;                         \n"\
+"varying mediump vec2  vTexCoord1;                         \n"\
+"                                                           \n"\
+"void main()                                                \n"\
+"{                                                          \n"\
+"gl_Position = aPosition;                                   \n"\
+"vShadeColor = aColor;                                      \n"\
+"                                                           \n"\
+"if (uRenderState == 1.0)                                   \n"\
+"{                                                          \n"\
+"vTexCoord0 = (aTexCoord0 * (uTexScale[0] *                 \n"\
+"           uCacheShiftScale[0]) + (uCacheOffset[0] -       \n"\
+"           uTexOffset[0])) * uCacheScale[0];               \n"\
+"vTexCoord1 = (aTexCoord0 * (uTexScale[1] *                 \n"\
+"           uCacheShiftScale[1]) + (uCacheOffset[1] -       \n"\
+"           uTexOffset[1])) * uCacheScale[1];               \n"\
+"}                                                          \n"\
+"else                                                       \n"\
+"{                                                          \n"\
+"vTexCoord0 = aTexCoord0;                                   \n"\
+"vTexCoord1 = aTexCoord1;                                   \n"\
+"}                                                          \n"\
+"                                                           \n";
+
+const char * _vertfog = "                                   \n"\
+"if (uEnableFog)                                            \n"\
+"{                                                          \n"\
+"vFactor = max(-1.0, aPosition.z / aPosition.w)             \n"\
+"   * uFogMultiplier + uFogOffset;                          \n"\
+"vFactor = clamp(vFactor, 0.0, 1.0);                        \n"\
+"}                                                          \n";
+
+const char * _vertzhack = "                                 \n"\
+"if (uRenderState == 1.0)                                   \n"\
+"{                                                          \n"\
+"gl_Position.z = (gl_Position.z + gl_Position.w*9.0) * 0.1; \n"\
+"}                                                          \n";
+
+
+const char * _color_param_str(int param)
+{
+    switch(param)
+    {
+        case COMBINED:          return "lFragColor.rgb";
+        case TEXEL0:            return "lTex0.rgb";
+        case TEXEL1:            return "lTex1.rgb";
+        case PRIMITIVE:         return "uPrimColor.rgb";
+        case SHADE:             return "vShadeColor.rgb";
+        case ENVIRONMENT:       return "uEnvColor.rgb";
+        case CENTER:            return "vec3(0.0)";
+        case SCALE:             return "vec3(0.0)";
+        case COMBINED_ALPHA:    return "vec3(lFragColor.a)";
+        case TEXEL0_ALPHA:      return "vec3(lTex0.a)";
+        case TEXEL1_ALPHA:      return "vec3(lTex1.a)";
+        case PRIMITIVE_ALPHA:   return "vec3(uPrimColor.a)";
+        case SHADE_ALPHA:       return "vec3(vShadeColor.a)";
+        case ENV_ALPHA:         return "vec3(uEnvColor.a)";
+        case LOD_FRACTION:      return "vec3(0.0)";
+        case PRIM_LOD_FRAC:     return "vec3(uPrimLODFrac)";
+        case NOISE:             return "lNoise.rgb";
+        case K4:                return "vec3(uK4)";
+        case K5:                return "vec3(uK5)";
+        case ONE:               return "vec3(1.0)";
+        case ZERO:              return "vec3(0.0)";
+        default:
+            return "vec3(0.0)";
+    }
+}
+
+const char * _alpha_param_str(int param)
+{
+    switch(param)
+    {
+        case COMBINED:          return "lFragColor.a";
+        case TEXEL0:            return "lTex0.a";
+        case TEXEL1:            return "lTex1.a";
+        case PRIMITIVE:         return "uPrimColor.a";
+        case SHADE:             return "vShadeColor.a";
+        case ENVIRONMENT:       return "uEnvColor.a";
+        case CENTER:            return "0.0";
+        case SCALE:             return "0.0";
+        case COMBINED_ALPHA:    return "lFragColor.a";
+        case TEXEL0_ALPHA:      return "lTex0.a";
+        case TEXEL1_ALPHA:      return "lTex1.a";
+        case PRIMITIVE_ALPHA:   return "uPrimColor.a";
+        case SHADE_ALPHA:       return "vShadeColor.a";
+        case ENV_ALPHA:         return "uEnvColor.a";
+        case LOD_FRACTION:      return "0.0";
+        case PRIM_LOD_FRAC:     return "uPrimLODFrac";
+        case NOISE:             return "lNoise.a";
+        case K4:                return "uK4";
+        case K5:                return "uK5";
+        case ONE:               return "1.0";
+        case ZERO:              return "0.0";
+        default:
+            return "0.0";
+    }
+}
+
+#define MAX_CACHE      16
+ShaderProgram* prog_cache[MAX_CACHE];
+u64                            mux_cache[MAX_CACHE];
+int                            flag_cache[MAX_CACHE];
+int                            old_cache[MAX_CACHE];
+static int             cache_turn=0;
+
+DecodedMux::DecodedMux(u64 mux, bool cycle2)
+{
+    combine.mux = mux;
+    flags = 0;
+
+    //set to ZERO.
+    for(int i=0;i<4;i++)
+        for(int j=0; j< 4; j++)
+            decode[i][j] = ZERO;
+
+    //rgb cycle 0
+    decode[0][0] = saRGBExpanded[combine.saRGB0];
+    decode[0][1] = sbRGBExpanded[combine.sbRGB0];
+    decode[0][2] = mRGBExpanded[combine.mRGB0];
+    decode[0][3] = aRGBExpanded[combine.aRGB0];
+    decode[1][0] = saAExpanded[combine.saA0];
+    decode[1][1] = sbAExpanded[combine.sbA0];
+    decode[1][2] = mAExpanded[combine.mA0];
+    decode[1][3] = aAExpanded[combine.aA0];
+    if (cycle2)
+    {
+        //rgb cycle 1
+        decode[2][0] = saRGBExpanded[combine.saRGB1];
+        decode[2][1] = sbRGBExpanded[combine.sbRGB1];
+        decode[2][2] = mRGBExpanded[combine.mRGB1];
+        decode[2][3] = aRGBExpanded[combine.aRGB1];
+        decode[3][0] = saAExpanded[combine.saA1];
+        decode[3][1] = sbAExpanded[combine.sbA1];
+        decode[3][2] = mAExpanded[combine.mA1];
+        decode[3][3] = aAExpanded[combine.aA1];
+
+        //texel 0/1 are swapped in 2nd cycle.
+        swap(1, TEXEL0, TEXEL1);
+        swap(1, TEXEL0_ALPHA, TEXEL1_ALPHA);
+    }
+
+    //simplifying mux:
+    if (replace(G_CYC_1CYCLE, LOD_FRACTION, ZERO) || replace(G_CYC_2CYCLE, LOD_FRACTION, ZERO))
+        LOG(LOG_VERBOSE, "SC Replacing LOD_FRACTION with ZERO\n");
+#if 1
+    if (replace(G_CYC_1CYCLE, K4, ZERO) || replace(G_CYC_2CYCLE, K4, ZERO))
+        LOG(LOG_VERBOSE, "SC Replacing K4 with ZERO\n");
+
+    if (replace(G_CYC_1CYCLE, K5, ZERO) || replace(G_CYC_2CYCLE, K5, ZERO))
+        LOG(LOG_VERBOSE, "SC Replacing K5 with ZERO\n");
+#endif
+
+    if (replace(G_CYC_1CYCLE, CENTER, ZERO) || replace(G_CYC_2CYCLE, CENTER, ZERO))
+        LOG(LOG_VERBOSE, "SC Replacing CENTER with ZERO\n");
+
+    if (replace(G_CYC_1CYCLE, SCALE, ZERO) || replace(G_CYC_2CYCLE, SCALE, ZERO))
+        LOG(LOG_VERBOSE, "SC Replacing SCALE with ZERO\n");
+
+    //Combiner has initial value of zero in cycle 0
+    if (replace(G_CYC_1CYCLE, COMBINED, ZERO))
+        LOG(LOG_VERBOSE, "SC Setting CYCLE1 COMBINED to ZERO\n");
+
+    if (replace(G_CYC_1CYCLE, COMBINED_ALPHA, ZERO))
+        LOG(LOG_VERBOSE, "SC Setting CYCLE1 COMBINED_ALPHA to ZERO\n");
+
+    if (!config.enableNoise)
+    {
+        if (replace(G_CYC_1CYCLE, NOISE, ZERO))
+            LOG(LOG_VERBOSE, "SC Setting CYCLE1 NOISE to ZERO\n");
+
+        if (replace(G_CYC_2CYCLE, NOISE, ZERO))
+            LOG(LOG_VERBOSE, "SC Setting CYCLE2 NOISE to ZERO\n");
+
+    }
+
+    //mutiplying by zero: (A-B)*0 + C = C
+    for(int i=0 ; i<4; i++)
+    {
+        if (decode[i][2] == ZERO)
+        {
+            decode[i][0] = ZERO;
+            decode[i][1] = ZERO;
+        }
+    }
+
+    //(A1-B1)*C1 + D1
+    //(A2-B2)*C2 + D2
+    //1. ((A1-B1)*C1 + D1 - B2)*C2 + D2 = A1*C1*C2 - B1*C1*C2 + D1*C2 - B2*C2 + D2
+    //2. (A2 - (A1-B1)*C1 - D1)*C2 + D2 = A2*C2 - A1*C1*C2 + B1*C1*C2 - D1*C2 + D2
+    //3. (A2 - B2)*((A1-B1)*C1 + D1) + D2 = A2*A1*C1 - A2*B1*C1 + A2*D1 - B2*A1*C1 + B2*B1*C1 - B2*D1 + D2
+    //4. (A2-B2)*C2 + (A1-B1)*C1 + D1 = A2*C2 - B2*C2 + A1*C1 - B1*C1 + D1
+
+    if (cycle2)
+    {
+
+        if (!find(2, COMBINED))
+            flags |= SC_IGNORE_RGB0;
+
+        if (!(find(2, COMBINED_ALPHA) || find(3, COMBINED_ALPHA) || find(3, COMBINED)))
+            flags |= SC_IGNORE_ALPHA0;
+
+        if (decode[2][0] == ZERO && decode[2][1] == ZERO && decode[2][2] == ZERO && decode[2][3] == COMBINED)
+        {
+            flags |= SC_IGNORE_RGB1;
+        }
+
+        if (decode[3][0] == ZERO && decode[3][1] == ZERO && decode[3][2] == ZERO &&
+            (decode[3][3] == COMBINED_ALPHA || decode[3][3] == COMBINED))
+        {
+            flags |= SC_IGNORE_ALPHA1;
+        }
+
+    }
+}
+
+bool DecodedMux::find(int index, int src)
+{
+    for(int j=0;j<4;j++)
+    {
+        if (decode[index][j] == src) return true;
+    }
+    return false;
+}
+
+bool DecodedMux::replace(int cycle, int src, int dest)
+{
+    int r = false;
+    for(int i=0;i<2;i++)
+    {
+        int ii = (cycle == 0) ? i : (2+i);
+        for(int j=0;j<4;j++)
+        {
+            if (decode[ii][j] == src) {decode[ii][j] = dest; r=true;}
+        }
+    }
+    return r;
+}
+
+bool DecodedMux::swap(int cycle, int src0, int src1)
+{
+    int r = false;
+    for(int i=0;i<2;i++)
+    {
+        int ii = (cycle == 0) ? i : (2+i);
+        for(int j=0;j<4;j++)
+        {
+            if (decode[ii][j] == src0) {decode[ii][j] = src1; r=true;}
+            else if (decode[ii][j] == src1) {decode[ii][j] = src0; r=true;}
+        }
+    }
+    return r;
+}
+
+void DecodedMux::hack()
+{
+    if (config.hackZelda)
+    {
+        if(combine.mux == 0xfffd923800ffadffLL)
+        {
+            replace(G_CYC_1CYCLE, TEXEL1, TEXEL0);
+            replace(G_CYC_2CYCLE, TEXEL1, TEXEL0);
+        }
+        else if (combine.mux == 0xff5bfff800121603LL)
+        {
+            replace(G_CYC_1CYCLE, TEXEL1, ZERO);
+            replace(G_CYC_2CYCLE, TEXEL1, ZERO);
+        }
+    }
+
+}
+
+
+int _program_compare(ShaderProgram *prog, DecodedMux *dmux, u32 flags)
+{
+    if (prog)
+        return ((prog->combine.mux == dmux->combine.mux) && (prog->flags == flags));
+    else
+        return 1;
+}
+
+void _glcompiler_error(GLint shader)
+{
+    int len, i;
+    char* log;
+
+    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &len);
+    log = (char*) malloc(len + 1);
+    glGetShaderInfoLog(shader, len, &i, log);
+    log[len] = 0;
+    LOG(LOG_ERROR, "COMPILE ERROR: %s \n", log);
+    free(log);
+}
+
+void _gllinker_error(GLint program)
+{
+    int len, i;
+    char* log;
+
+    glGetProgramiv(program, GL_INFO_LOG_LENGTH, &len);
+    log = (char*) malloc(len + 1);
+    glGetProgramInfoLog(program, len, &i, log);
+    log[len] = 0;
+    LOG(LOG_ERROR, "LINK ERROR: %s \n", log);
+    free(log);
+};
+
+void _locate_attributes(ShaderProgram *p)
+{
+    glBindAttribLocation(p->program, SC_POSITION,   "aPosition");
+    glBindAttribLocation(p->program, SC_COLOR,      "aColor");
+    glBindAttribLocation(p->program, SC_TEXCOORD0,  "aTexCoord0");
+    glBindAttribLocation(p->program, SC_TEXCOORD1,  "aTexCoord1");
+};
+
+#define LocateUniform(A) \
+    p->uniforms.A.loc = glGetUniformLocation(p->program, #A);
+
+void _locate_uniforms(ShaderProgram *p)
+{
+    LocateUniform(uTex0);
+    LocateUniform(uTex1);
+    LocateUniform(uNoise);
+    LocateUniform(uEnvColor);
+    LocateUniform(uPrimColor);
+    LocateUniform(uPrimLODFrac);
+    LocateUniform(uK4);
+    LocateUniform(uK5);
+    LocateUniform(uFogColor);
+    LocateUniform(uEnableFog);
+    LocateUniform(uRenderState);
+    LocateUniform(uFogMultiplier);
+    LocateUniform(uFogOffset);
+    LocateUniform(uAlphaRef);
+    LocateUniform(uTexScale);
+    LocateUniform(uTexOffset[0]);
+    LocateUniform(uTexOffset[1]);
+    LocateUniform(uCacheShiftScale[0]);
+    LocateUniform(uCacheShiftScale[1]);
+    LocateUniform(uCacheScale[0]);
+    LocateUniform(uCacheScale[1]);
+    LocateUniform(uCacheOffset[0]);
+    LocateUniform(uCacheOffset[1]);
+}
+
+void _force_uniforms()
+{
+    SC_ForceUniform1i(uTex0, 0);
+    SC_ForceUniform1i(uTex1, 1);
+    SC_ForceUniform1i(uNoise, 2);
+    SC_ForceUniform4fv(uEnvColor, &gDP.envColor.r);
+    SC_ForceUniform4fv(uPrimColor, &gDP.primColor.r);
+    SC_ForceUniform1f(uPrimLODFrac, gDP.primColor.l);
+    SC_ForceUniform1f(uK4, gDP.convert.k4);
+    SC_ForceUniform1f(uK5, gDP.convert.k5);
+    SC_ForceUniform4fv(uFogColor, &gDP.fogColor.r);
+    SC_ForceUniform1i(uEnableFog, ((config.enableFog==1) && (gSP.geometryMode & G_FOG)));
+    SC_ForceUniform1f(uRenderState, OGL.renderState);
+    SC_ForceUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f);
+    SC_ForceUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f);
+    SC_ForceUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5 : gDP.blendColor.a);
+    SC_ForceUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet);
+
+    if (gSP.textureTile[0]){
+        SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult);
+    } else {
+        SC_ForceUniform2f(uTexOffset[0], 0.0f, 0.0f);
+    }
+
+    if (gSP.textureTile[1])
+    {
+        SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult);
+    }
+    else
+    {
+        SC_ForceUniform2f(uTexOffset[1], 0.0f, 0.0f);
+    }
+
+    if (cache.current[0])
+    {
+        SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT);
+        SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT);
+        SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT);
+    }
+    else
+    {
+        SC_ForceUniform2f(uCacheShiftScale[0], 1.0f, 1.0f);
+        SC_ForceUniform2f(uCacheScale[0], 1.0f, 1.0f);
+        SC_ForceUniform2f(uCacheOffset[0], 0.0f, 0.0f);
+    }
+
+    if (cache.current[1])
+    {
+        SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT);
+        SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT);
+        SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT);
+    }
+    else
+    {
+        SC_ForceUniform2f(uCacheShiftScale[1], 1.0f, 1.0f);
+        SC_ForceUniform2f(uCacheScale[1], 1.0f, 1.0f);
+        SC_ForceUniform2f(uCacheOffset[1], 0.0f, 0.0f);
+    }
+}
+
+void _update_uniforms()
+{
+    SC_SetUniform4fv(uEnvColor, &gDP.envColor.r);
+    SC_SetUniform4fv(uPrimColor, &gDP.primColor.r);
+    SC_SetUniform1f(uPrimLODFrac, gDP.primColor.l);
+    SC_SetUniform4fv(uFogColor, &gDP.fogColor.r);
+    SC_SetUniform1i(uEnableFog, (config.enableFog && (gSP.geometryMode & G_FOG)));
+    SC_SetUniform1f(uRenderState, OGL.renderState);
+    SC_SetUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f);
+    SC_SetUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f);
+    SC_SetUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5 : gDP.blendColor.a);
+    SC_SetUniform1f(uK4, gDP.convert.k4);
+    SC_SetUniform1f(uK5, gDP.convert.k5);
+
+    //for some reason i must force these...
+    SC_ForceUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet);
+    if (scProgramCurrent->usesT0)
+    {
+        if (gSP.textureTile[0])
+        {
+            SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult);
+        }
+        if (cache.current[0])
+        {
+            SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT);
+            SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT);
+            SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT);
+        }
+    }
+
+    if (scProgramCurrent->usesT1)
+    {
+        if (gSP.textureTile[1])
+        {
+            SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult);
+        }
+        if (cache.current[1])
+        {
+            SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT);
+            SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT);
+            SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT);
+        }
+    }
+};
+
+void ShaderCombiner_Init()
+{
+    //compile vertex shader:
+    GLint success;
+    const char *src[1];
+    char buff[4096];
+    char *str = buff;
+
+    str += sprintf(str, "%s", _vert);
+    if (config.enableFog)
+    {
+        str += sprintf(str, "%s", _vertfog);
+    }
+    if (config.zHack)
+    {
+        str += sprintf(str, "%s", _vertzhack);
+    }
+
+    str += sprintf(str, "}\n\n");
+
+#ifdef PRINT_SHADER
+    LOG(LOG_VERBOSE, "=============================================================\n");
+    LOG(LOG_VERBOSE, "Vertex Shader:\n");
+    LOG(LOG_VERBOSE, "=============================================================\n");
+    LOG(LOG_VERBOSE, "%s", buff);
+    LOG(LOG_VERBOSE, "=============================================================\n");
+#endif
+
+    src[0] = buff;
+    _vertex_shader = glCreateShader(GL_VERTEX_SHADER);
+    glShaderSource(_vertex_shader, 1, (const char**) src, NULL);
+    glCompileShader(_vertex_shader);
+    glGetShaderiv(_vertex_shader, GL_COMPILE_STATUS, &success);
+    if (!success)
+    {
+        _glcompiler_error(_vertex_shader);
+    }
+       
+       // prepare prog cache
+       for (int i=0; i<MAX_CACHE; i++) {
+               prog_cache[i]=NULL;
+               flag_cache[i]=0;
+               mux_cache[i]=0;
+               old_cache[i]=0;
+       }
+       cache_turn=0;
+};
+
+void ShaderCombiner_DeletePrograms(ShaderProgram *prog)
+{
+    if (prog)
+    {
+        ShaderCombiner_DeletePrograms(prog->left);
+        ShaderCombiner_DeletePrograms(prog->right);
+        glDeleteProgram(prog->program);
+        //glDeleteShader(prog->fragment);
+        free(prog);
+        scProgramCount--;
+    }
+}
+
+void ShaderCombiner_Destroy()
+{
+    ShaderCombiner_DeletePrograms(scProgramRoot);
+    glDeleteShader(_vertex_shader);
+    scProgramCount = scProgramChanged = 0;
+    scProgramRoot = scProgramCurrent = NULL;
+}
+
+void ShaderCombiner_Set(u64 mux, int flags)
+{
+    //banjo tooie hack
+    if ((gDP.otherMode.cycleType == G_CYC_1CYCLE) && (mux == 0x00ffe7ffffcf9fcfLL))
+    {
+        mux = EncodeCombineMode( 0, 0, 0, 0, TEXEL0, 0, PRIMITIVE, 0,
+                                 0, 0, 0, 0, TEXEL0, 0, PRIMITIVE, 0 );
+    }
+
+    //determine flags
+    if (flags == -1)
+    {
+        flags = 0;
+        if ((config.enableFog) && (gSP.geometryMode & G_FOG))
+            flags |= SC_FOGENABLED;
+
+        if (config.enableAlphaTest)
+        {
+            if ((gDP.otherMode.alphaCompare == G_AC_THRESHOLD) && !(gDP.otherMode.alphaCvgSel)){
+                flags |= SC_ALPHAENABLED;
+                if (gDP.blendColor.a > 0.0f) flags |= SC_ALPHAGREATER;
+            } else if (gDP.otherMode.cvgXAlpha){
+                flags |= SC_ALPHAENABLED;
+                flags |= SC_ALPHAGREATER;
+            }
+        }
+
+        if (gDP.otherMode.cycleType == G_CYC_2CYCLE)
+            flags |= SC_2CYCLE;
+    }
+
+
+    DecodedMux dmux(mux, flags&SC_2CYCLE);
+    dmux.hack();
+
+    //if already bound:
+    if (scProgramCurrent)
+    {
+        if (_program_compare(scProgramCurrent, &dmux, flags))
+        {
+            scProgramChanged = 0;
+            return;
+        }
+    }
+
+    //traverse binary tree for cached programs
+    scProgramChanged = 1;
+    ShaderProgram *root = scProgramRoot;
+    ShaderProgram *prog = root;
+    while(!_program_compare(prog, &dmux, flags))
+    {
+        root = prog;
+        if (prog->combine.mux < dmux.combine.mux)
+            prog = prog->right;
+        else
+            prog = prog->left;
+    }
+
+    //build new program
+    if (!prog)
+    {
+        scProgramCount++;
+        prog = ShaderCombiner_Compile(&dmux, flags);
+        if (!root)
+            scProgramRoot = prog;
+        else if (root->combine.mux < dmux.combine.mux)
+            root->right = prog;
+        else
+            root->left = prog;
+
+    }
+
+    prog->lastUsed = OGL.frame_dl;
+    scProgramCurrent = prog;
+    glUseProgram(prog->program);
+    _force_uniforms();
+}
+
+ShaderProgram *ShaderCombiner_Compile(DecodedMux *dmux, int flags)
+{
+    GLint success;
+    char frag[4096];
+    char *buffer = frag;
+    ShaderProgram *prog = (ShaderProgram*) malloc(sizeof(ShaderProgram));
+
+    prog->left = prog->right = NULL;
+    prog->usesT0 = prog->usesT1 = prog->usesCol = prog->usesNoise = 0;
+    prog->combine = dmux->combine;
+    prog->flags = flags;
+    prog->vertex = _vertex_shader;
+
+    for(int i=0; i < ((flags & SC_2CYCLE) ? 4 : 2); i++)
+    {
+        //make sure were not ignoring cycle:
+        if ((dmux->flags&(1<<i)) == 0)
+        {
+            for(int j=0;j<4;j++)
+            {
+                prog->usesT0 |= (dmux->decode[i][j] == TEXEL0 || dmux->decode[i][j] == TEXEL0_ALPHA);
+                prog->usesT1 |= (dmux->decode[i][j] == TEXEL1 || dmux->decode[i][j] == TEXEL1_ALPHA);
+                prog->usesCol |= (dmux->decode[i][j] == SHADE || dmux->decode[i][j] == SHADE_ALPHA);
+                prog->usesNoise |= (dmux->decode[i][j] == NOISE);
+            }
+        }
+    }
+
+    buffer += sprintf(buffer, "%s", _frag_header);
+    if (prog->usesT0)
+        buffer += sprintf(buffer, "lowp vec4 lTex0 = texture2D(uTex0, vTexCoord0); \n");
+    if (prog->usesT1)
+        buffer += sprintf(buffer, "lowp vec4 lTex1 = texture2D(uTex1, vTexCoord1); \n");
+    if (prog->usesNoise)
+        buffer += sprintf(buffer, "lowp vec4 lNoise = texture2D(uNoise, (1.0 / 1024.0) * gl_FragCoord.st); \n");
+
+    for(int i = 0; i < ((flags & SC_2CYCLE) ? 2 : 1); i++)
+    {
+        if ((dmux->flags&(1<<(i*2))) == 0)
+        {
+            buffer += sprintf(buffer, "lFragColor.rgb = (%s - %s) * %s + %s; \n",
+                _color_param_str(dmux->decode[i*2][0]),
+                _color_param_str(dmux->decode[i*2][1]),
+                _color_param_str(dmux->decode[i*2][2]),
+                _color_param_str(dmux->decode[i*2][3])
+                );
+        }
+
+        if ((dmux->flags&(1<<(i*2+1))) == 0)
+        {
+            buffer += sprintf(buffer, "lFragColor.a = (%s - %s) * %s + %s; \n",
+                _alpha_param_str(dmux->decode[i*2+1][0]),
+                _alpha_param_str(dmux->decode[i*2+1][1]),
+                _alpha_param_str(dmux->decode[i*2+1][2]),
+                _alpha_param_str(dmux->decode[i*2+1][3])
+                );
+        }
+        buffer += sprintf(buffer, "gl_FragColor = lFragColor; \n");
+    };
+
+    //fog
+    if (flags&SC_FOGENABLED)
+    {
+        buffer += sprintf(buffer, "gl_FragColor = mix(gl_FragColor, uFogColor, vFactor); \n");
+    }
+
+    //alpha function
+    if (flags&SC_ALPHAENABLED)
+    {
+        if (flags&SC_ALPHAGREATER)
+            buffer += sprintf(buffer, "if (gl_FragColor.a < uAlphaRef) %s;\n", config.hackAlpha ? "gl_FragColor.a = 0" : "discard");
+        else
+            buffer += sprintf(buffer, "if (gl_FragColor.a <= uAlphaRef) %s;\n", config.hackAlpha ? "gl_FragColor.a = 0" : "discard");
+    }
+    buffer += sprintf(buffer, "} \n\n");
+    *buffer = 0;
+
+#ifdef PRINT_SHADER
+    LOG(LOG_VERBOSE, "=============================================================\n");
+    LOG(LOG_VERBOSE, "Combine=0x%llx flags=0x%x dmux flags=0x%x\n", prog->combine.mux, flags, dmux->flags);
+    LOG(LOG_VERBOSE, "Num=%i \t usesT0=%i usesT1=%i usesCol=%i usesNoise=%i\n", scProgramCount, prog->usesT0, prog->usesT1, prog->usesCol, prog->usesNoise);
+    LOG(LOG_VERBOSE, "=============================================================\n");
+    LOG(LOG_VERBOSE, "%s", frag);
+    LOG(LOG_VERBOSE, "=============================================================\n");
+#endif
+
+    prog->program = glCreateProgram();
+
+    //Compile:
+    char *src[1];
+    src[0] = frag;
+    GLint len[1];
+    len[0] = min(4096, strlen(frag));
+    prog->fragment = glCreateShader(GL_FRAGMENT_SHADER);
+
+    glShaderSource(prog->fragment, 1, (const char**) src, len);
+    glCompileShader(prog->fragment);
+
+
+    glGetShaderiv(prog->fragment, GL_COMPILE_STATUS, &success);
+    if (!success)
+    {
+        _glcompiler_error(prog->fragment);
+    }
+
+    //link
+    _locate_attributes(prog);
+    glAttachShader(prog->program, prog->fragment);
+    glAttachShader(prog->program, prog->vertex);
+    glLinkProgram(prog->program);
+    glGetProgramiv(prog->program, GL_LINK_STATUS, &success);
+    if (!success)
+    {
+        _gllinker_error(prog->program);
+    }
+
+    //remove fragment shader:
+    glDeleteShader(prog->fragment);
+
+    _locate_uniforms(prog);
+    return prog;
+}
+
diff --git a/source/gles2n64/src/ShaderCombiner.h b/source/gles2n64/src/ShaderCombiner.h
new file mode 100644 (file)
index 0000000..c036389
--- /dev/null
@@ -0,0 +1,258 @@
+
+#ifndef SHADERCOMBINER_H
+#define SHADERCOMBINER_H
+
+#define PRINT_SHADER
+#define UNIFORM_CHECK
+
+#define SC_FOGENABLED           0x1
+#define SC_ALPHAENABLED         0x2
+#define SC_ALPHAGREATER         0x4
+#define SC_2CYCLE               0x8
+
+#define SC_POSITION             1
+#define SC_COLOR                2
+#define SC_TEXCOORD0            3
+#define SC_TEXCOORD1            4
+
+#ifdef UNIFORM_CHECK
+#define SC_SetUniform1i(A, B) \
+    if (scProgramCurrent->uniforms.A.val != B) \
+        {scProgramCurrent->uniforms.A.val = B; glUniform1i(scProgramCurrent->uniforms.A.loc, B);}
+
+#define SC_SetUniform1f(A, B) \
+    if (scProgramCurrent->uniforms.A.val != B) \
+    {scProgramCurrent->uniforms.A.val = B; glUniform1f(scProgramCurrent->uniforms.A.loc, B);}
+
+#define SC_SetUniform4fv(A, B) \
+    if ((scProgramCurrent->uniforms.A.val[0] != (B)[0]) || (scProgramCurrent->uniforms.A.val[1] != (B)[1]) || \
+        (scProgramCurrent->uniforms.A.val[2] != (B)[2]) || (scProgramCurrent->uniforms.A.val[3] != (B)[3]))   \
+        {memcpy(scProgramCurrent->uniforms.A.val, B, 16); glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B);}
+
+#define SC_SetUniform2f(A, B, C) \
+    if ((scProgramCurrent->uniforms.A.val[0] != B) || (scProgramCurrent->uniforms.A.val[1] != C)) \
+        {scProgramCurrent->uniforms.A.val[0] = B; scProgramCurrent->uniforms.A.val[1] = C; glUniform2f(scProgramCurrent->uniforms.A.loc, B, C);}
+
+#define SC_ForceUniform1i(A, B) \
+        {scProgramCurrent->uniforms.A.val = B; glUniform1i(scProgramCurrent->uniforms.A.loc, B);}
+
+#define SC_ForceUniform1f(A, B) \
+        {scProgramCurrent->uniforms.A.val = B; glUniform1f(scProgramCurrent->uniforms.A.loc, B);}
+
+#define SC_ForceUniform4fv(A, B) \
+        {memcpy(scProgramCurrent->uniforms.A.val, B, 16); glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B);}
+
+#define SC_ForceUniform2f(A, B, C) \
+        {scProgramCurrent->uniforms.A.val[0] = B; scProgramCurrent->uniforms.A.val[1] = C; glUniform2f(scProgramCurrent->uniforms.A.loc, B, C);}
+
+#else
+#define SC_SetUniform1i(A, B)       glUniform1i(scProgramCurrent->uniforms.A.loc, B)
+#define SC_SetUniform1f(A, B)       glUniform1f(scProgramCurrent->uniforms.A.loc, B)
+#define SC_SetUniform4fv(A, B)      glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B)
+#define SC_SetUniform2f(A, B, C)    glUniform2f(scProgramCurrent->uniforms.A.loc, B, C)
+#define SC_ForceUniform1i(A, B)     glUniform1i(scProgramCurrent->uniforms.A.loc, B)
+#define SC_ForceUniform1f(A, B)     glUniform1f(scProgramCurrent->uniforms.A.loc, B)
+#define SC_ForceUniform4fv(A, B)    glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B)
+#define SC_ForceUniform2f(A, B, C)  glUniform2f(scProgramCurrent->uniforms.A.loc, B, C)
+#endif
+
+/* Color combiner constants: */
+#define G_CCMUX_COMBINED        0
+#define G_CCMUX_TEXEL0          1
+#define G_CCMUX_TEXEL1          2
+#define G_CCMUX_PRIMITIVE       3
+#define G_CCMUX_SHADE           4
+#define G_CCMUX_ENVIRONMENT     5
+#define G_CCMUX_CENTER          6
+#define G_CCMUX_SCALE           6
+#define G_CCMUX_COMBINED_ALPHA  7
+#define G_CCMUX_TEXEL0_ALPHA    8
+#define G_CCMUX_TEXEL1_ALPHA    9
+#define G_CCMUX_PRIMITIVE_ALPHA 10
+#define G_CCMUX_SHADE_ALPHA     11
+#define G_CCMUX_ENV_ALPHA       12
+#define G_CCMUX_LOD_FRACTION    13
+#define G_CCMUX_PRIM_LOD_FRAC   14
+#define G_CCMUX_NOISE           7
+#define G_CCMUX_K4              7
+#define G_CCMUX_K5              15
+#define G_CCMUX_1               6
+#define G_CCMUX_0               31
+
+/* Alpha combiner constants: */
+#define G_ACMUX_COMBINED        0
+#define G_ACMUX_TEXEL0          1
+#define G_ACMUX_TEXEL1          2
+#define G_ACMUX_PRIMITIVE       3
+#define G_ACMUX_SHADE           4
+#define G_ACMUX_ENVIRONMENT     5
+#define G_ACMUX_LOD_FRACTION    0
+#define G_ACMUX_PRIM_LOD_FRAC   6
+#define G_ACMUX_1               6
+#define G_ACMUX_0               7
+
+#define EncodeCombineMode( a0, b0, c0, d0, Aa0, Ab0, Ac0, Ad0,  \
+        a1, b1, c1, d1, Aa1, Ab1, Ac1, Ad1 ) \
+        (u64)(((u64)(_SHIFTL( G_CCMUX_##a0, 20, 4 ) | _SHIFTL( G_CCMUX_##c0, 15, 5 ) | \
+        _SHIFTL( G_ACMUX_##Aa0, 12, 3 ) | _SHIFTL( G_ACMUX_##Ac0, 9, 3 ) | \
+        _SHIFTL( G_CCMUX_##a1, 5, 4 ) | _SHIFTL( G_CCMUX_##c1, 0, 5 )) << 32) | \
+        (u64)(_SHIFTL( G_CCMUX_##b0, 28, 4 ) | _SHIFTL( G_CCMUX_##d0, 15, 3 ) | \
+        _SHIFTL( G_ACMUX_##Ab0, 12, 3 ) | _SHIFTL( G_ACMUX_##Ad0, 9, 3 ) | \
+        _SHIFTL( G_CCMUX_##b1, 24, 4 ) | _SHIFTL( G_ACMUX_##Aa1, 21, 3 ) | \
+        _SHIFTL( G_ACMUX_##Ac1, 18, 3 ) | _SHIFTL( G_CCMUX_##d1, 6, 3 ) | \
+        _SHIFTL( G_ACMUX_##Ab1, 3, 3 ) | _SHIFTL( G_ACMUX_##Ad1, 0, 3 )))
+
+#define G_CC_PRIMITIVE              0, 0, 0, PRIMITIVE, 0, 0, 0, PRIMITIVE
+#define G_CC_SHADE                  0, 0, 0, SHADE, 0, 0, 0, SHADE
+#define G_CC_MODULATEI              TEXEL0, 0, SHADE, 0, 0, 0, 0, SHADE
+#define G_CC_MODULATEIA             TEXEL0, 0, SHADE, 0, TEXEL0, 0, SHADE, 0
+#define G_CC_MODULATEIDECALA        TEXEL0, 0, SHADE, 0, 0, 0, 0, TEXEL0
+#define G_CC_MODULATERGB            G_CC_MODULATEI
+#define G_CC_MODULATERGBA           G_CC_MODULATEIA
+#define G_CC_MODULATERGBDECALA      G_CC_MODULATEIDECALA
+#define G_CC_MODULATEI_PRIM         TEXEL0, 0, PRIMITIVE, 0, 0, 0, 0, PRIMITIVE
+#define G_CC_MODULATEIA_PRIM        TEXEL0, 0, PRIMITIVE, 0, TEXEL0, 0, PRIMITIVE, 0
+#define G_CC_MODULATEIDECALA_PRIM   TEXEL0, 0, PRIMITIVE, 0, 0, 0, 0, TEXEL0
+#define G_CC_MODULATERGB_PRIM       G_CC_MODULATEI_PRIM
+#define G_CC_MODULATERGBA_PRIM      G_CC_MODULATEIA_PRIM
+#define G_CC_MODULATERGBDECALA_PRIM G_CC_MODULATEIDECALA_PRIM
+#define G_CC_DECALRGB               0, 0, 0, TEXEL0, 0, 0, 0, SHADE
+#define G_CC_DECALRGBA              0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0
+#define G_CC_BLENDI                 ENVIRONMENT, SHADE, TEXEL0, SHADE, 0, 0, 0, SHADE
+#define G_CC_BLENDIA                ENVIRONMENT, SHADE, TEXEL0, SHADE, TEXEL0, 0, SHADE, 0
+#define G_CC_BLENDIDECALA           ENVIRONMENT, SHADE, TEXEL0, SHADE, 0, 0, 0, TEXEL0
+#define G_CC_BLENDRGBA              TEXEL0, SHADE, TEXEL0_ALPHA, SHADE, 0, 0, 0, SHADE
+#define G_CC_BLENDRGBDECALA         TEXEL0, SHADE, TEXEL0_ALPHA, SHADE, 0, 0, 0, TEXEL0
+#define G_CC_ADDRGB                 1, 0, TEXEL0, SHADE, 0, 0, 0, SHADE
+#define G_CC_ADDRGBDECALA           1, 0, TEXEL0, SHADE, 0, 0, 0, TEXEL0
+#define G_CC_REFLECTRGB             ENVIRONMENT, 0, TEXEL0, SHADE, 0, 0, 0, SHADE
+#define G_CC_REFLECTRGBDECALA       ENVIRONMENT, 0, TEXEL0, SHADE, 0, 0, 0, TEXEL0
+#define G_CC_HILITERGB              PRIMITIVE, SHADE, TEXEL0, SHADE, 0, 0, 0, SHADE
+#define G_CC_HILITERGBA             PRIMITIVE, SHADE, TEXEL0, SHADE, PRIMITIVE, SHADE, TEXEL0, SHADE
+#define G_CC_HILITERGBDECALA        PRIMITIVE, SHADE, TEXEL0, SHADE, 0, 0, 0, TEXEL0
+#define G_CC_SHADEDECALA            0, 0, 0, SHADE, 0, 0, 0, TEXEL0
+#define G_CC_BLENDPE                PRIMITIVE, ENVIRONMENT, TEXEL0, ENVIRONMENT, TEXEL0, 0, SHADE, 0
+#define G_CC_BLENDPEDECALA          PRIMITIVE, ENVIRONMENT, TEXEL0, ENVIRONMENT, 0, 0, 0, TEXEL0
+#define _G_CC_BLENDPE               ENVIRONMENT, PRIMITIVE, TEXEL0, PRIMITIVE, TEXEL0, 0, SHADE, 0
+#define _G_CC_BLENDPEDECALA         ENVIRONMENT, PRIMITIVE, TEXEL0, PRIMITIVE, 0, 0, 0, TEXEL0
+#define _G_CC_TWOCOLORTEX           PRIMITIVE, SHADE, TEXEL0, SHADE, 0, 0, 0, SHADE
+#define _G_CC_SPARSEST              PRIMITIVE, TEXEL0, LOD_FRACTION, TEXEL0, PRIMITIVE, TEXEL0, LOD_FRACTION, TEXEL0
+#define G_CC_TEMPLERP               TEXEL1, TEXEL0, PRIM_LOD_FRAC, TEXEL0, TEXEL1, TEXEL0, PRIM_LOD_FRAC, TEXEL0
+#define G_CC_TRILERP                TEXEL1, TEXEL0, LOD_FRACTION, TEXEL0, TEXEL1, TEXEL0, LOD_FRACTION, TEXEL0
+#define G_CC_INTERFERENCE           TEXEL0, 0, TEXEL1, 0, TEXEL0, 0, TEXEL1, 0
+#define G_CC_1CYUV2RGB              TEXEL0, K4, K5, TEXEL0, 0, 0, 0, SHADE
+#define G_CC_YUV2RGB                TEXEL1, K4, K5, TEXEL1, 0, 0, 0, 0
+#define G_CC_PASS2                  0, 0, 0, COMBINED, 0, 0, 0, COMBINED
+#define G_CC_MODULATEI2             COMBINED, 0, SHADE, 0, 0, 0, 0, SHADE
+#define G_CC_MODULATEIA2            COMBINED, 0, SHADE, 0, COMBINED, 0, SHADE, 0
+#define G_CC_MODULATERGB2           G_CC_MODULATEI2
+#define G_CC_MODULATERGBA2          G_CC_MODULATEIA2
+#define G_CC_MODULATEI_PRIM2        COMBINED, 0, PRIMITIVE, 0, 0, 0, 0, PRIMITIVE
+#define G_CC_MODULATEIA_PRIM2       COMBINED, 0, PRIMITIVE, 0, COMBINED, 0, PRIMITIVE, 0
+#define G_CC_MODULATERGB_PRIM2      G_CC_MODULATEI_PRIM2
+#define G_CC_MODULATERGBA_PRIM2     G_CC_MODULATEIA_PRIM2
+#define G_CC_DECALRGB2              0, 0, 0, COMBINED, 0, 0, 0, SHADE
+#define G_CC_BLENDI2                ENVIRONMENT, SHADE, COMBINED, SHADE, 0, 0, 0, SHADE
+#define G_CC_BLENDIA2               ENVIRONMENT, SHADE, COMBINED, SHADE, COMBINED, 0, SHADE, 0
+#define G_CC_CHROMA_KEY2            TEXEL0, CENTER, SCALE, 0, 0, 0, 0, 0
+#define G_CC_HILITERGB2             ENVIRONMENT, COMBINED, TEXEL0, COMBINED, 0, 0, 0, SHADE
+#define G_CC_HILITERGBA2            ENVIRONMENT, COMBINED, TEXEL0, COMBINED, ENVIRONMENT, COMBINED, TEXEL0, COMBINED
+#define G_CC_HILITERGBDECALA2       ENVIRONMENT, COMBINED, TEXEL0, COMBINED, 0, 0, 0, TEXEL0
+#define G_CC_HILITERGBPASSA2        ENVIRONMENT, COMBINED, TEXEL0, COMBINED, 0, 0, 0, COMBINED
+
+// Internal generalized combiner inputs
+#define COMBINED        0
+#define TEXEL0          1
+#define TEXEL1          2
+#define PRIMITIVE       3
+#define SHADE           4
+#define ENVIRONMENT     5
+#define CENTER          6
+#define SCALE           7
+#define COMBINED_ALPHA  8
+#define TEXEL0_ALPHA    9
+#define TEXEL1_ALPHA    10
+#define PRIMITIVE_ALPHA 11
+#define SHADE_ALPHA     12
+#define ENV_ALPHA       13
+#define LOD_FRACTION    14
+#define PRIM_LOD_FRAC   15
+#define NOISE           16
+#define K4              17
+#define K5              18
+#define ONE             19
+#define ZERO            20
+#define UNKNOWN         21
+
+
+struct UniformLocation
+{
+    struct {GLint loc; int val;} uTex0, uTex1, uNoise;
+    struct {GLint loc; int val;} uEnableFog;
+    struct {GLint loc; float val;} uFogMultiplier, uFogOffset, uAlphaRef, uPrimLODFrac, uRenderState, uK4, uK5;
+    struct {GLint loc; float val[4];} uEnvColor, uPrimColor, uFogColor;
+    struct {GLint loc; float val[2];}  uTexScale, uTexOffset[2], uCacheShiftScale[2],
+        uCacheScale[2], uCacheOffset[2];
+};
+
+struct ShaderProgram
+{
+    GLint       program;
+    GLint       fragment;
+    GLint       vertex;
+    int         usesT0;       //uses texcoord0 attrib
+    int         usesT1;       //uses texcoord1 attrib
+    int         usesCol;      //uses color attrib
+    int         usesNoise;    //requires noise texture
+
+    UniformLocation uniforms;
+    gDPCombine      combine;
+    u32             flags;
+    ShaderProgram   *left, *right;
+    u32             lastUsed;
+};
+
+
+//dmux flags:
+#define SC_IGNORE_RGB0      (1<<0)
+#define SC_IGNORE_ALPHA0    (1<<1)
+#define SC_IGNORE_RGB1      (1<<2)
+#define SC_IGNORE_ALPHA1    (1<<3)
+
+class DecodedMux
+{
+    public:
+        DecodedMux(u64 mux, bool cycle2);
+
+        void hack();
+        bool find(int index, int src);
+        bool swap(int cycle, int src0, int src1);
+        bool replace(int cycle, int src, int dest);
+
+        gDPCombine combine;
+        int decode[4][4];
+        int flags;
+};
+
+extern int CCEncodeA[];
+extern int CCEncodeB[];
+extern int CCEncodeC[];
+extern int CCEncodeD[];
+extern int ACEncodeA[];
+extern int ACEncodeB[];
+extern int ACEncodeC[];
+extern int ACEncodeD[];
+
+extern ShaderProgram    *scProgramRoot;
+extern ShaderProgram    *scProgramCurrent;
+extern int              scProgramChanged;
+extern int              scProgramCount;
+
+extern void ShaderCombiner_Init();
+extern void ShaderCombiner_Destroy();
+extern void ShaderCombiner_DeleteProgram(ShaderProgram *prog);
+extern void ShaderCombiner_Set(u64 mux, int flags=-1);
+extern ShaderProgram *ShaderCombiner_Compile(DecodedMux *dmux, int flags);
+
+#endif
+
diff --git a/source/gles2n64/src/Textures.cpp b/source/gles2n64/src/Textures.cpp
new file mode 100644 (file)
index 0000000..d39f32d
--- /dev/null
@@ -0,0 +1,1334 @@
+#include <time.h>
+#include <stdlib.h>
+#include <memory.h>
+
+#ifndef min
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+#include "Common.h"
+#include "Config.h"
+#include "OpenGL.h"
+#include "Textures.h"
+#include "GBI.h"
+#include "RSP.h"
+#include "gDP.h"
+#include "gSP.h"
+#include "N64.h"
+#include "CRC.h"
+#include "convert.h"
+#include "2xSAI.h"
+//#include "FrameBuffer.h"
+
+#define FORMAT_NONE     0
+#define FORMAT_I8       1
+#define FORMAT_IA88     2
+#define FORMAT_RGBA4444 3
+#define FORMAT_RGBA5551 4
+#define FORMAT_RGBA8888 5
+
+//#define PRINT_TEXTUREFORMAT
+
+TextureCache    cache;
+
+typedef u32 (*GetTexelFunc)( void *src, u16 x, u16 i, u8 palette );
+
+u32 GetNone( void *src, u16 x, u16 i, u8 palette )
+{
+    return 0x00000000;
+}
+
+u32 GetCI4IA_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    if (x & 1)
+        return IA88_RGBA4444( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] );
+    else
+        return IA88_RGBA4444( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] );
+}
+
+u32 GetCI4IA_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    if (x & 1)
+        return IA88_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] );
+    else
+        return IA88_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] );
+}
+
+u32 GetCI4RGBA_RGBA5551( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    if (x & 1)
+        return RGBA5551_RGBA5551( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] );
+    else
+        return RGBA5551_RGBA5551( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] );
+}
+
+u32 GetCI4RGBA_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    if (x & 1)
+        return RGBA5551_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] );
+    else
+        return RGBA5551_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] );
+}
+
+u32 GetIA31_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    return IA31_RGBA8888( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) );
+}
+
+u32 GetIA31_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    return IA31_RGBA4444( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) );
+}
+
+u32 GetIA31_IA88( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    return IA31_IA88( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) );
+}
+
+u32 GetI4_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    return I4_RGBA8888( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) );
+}
+
+u32 GetI4_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    return I4_RGBA4444( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) );
+}
+
+u32 GetI4_I8( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    return I4_I8( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) );
+}
+
+
+u32 GetI4_IA88( void *src, u16 x, u16 i, u8 palette )
+{
+    u8 color4B = ((u8*)src)[(x>>1)^(i<<1)];
+    return I4_IA88( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) );
+}
+
+u32 GetCI8IA_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA88_RGBA4444( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] );
+}
+
+u32 GetCI8IA_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA88_RGBA8888( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] );
+}
+
+u32 GetCI8RGBA_RGBA5551( void *src, u16 x, u16 i, u8 palette )
+{
+    return RGBA5551_RGBA5551( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] );
+}
+
+u32 GetCI8RGBA_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    return RGBA5551_RGBA8888( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] );
+}
+
+u32 GetIA44_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA44_RGBA8888(((u8*)src)[x^(i<<1)]);
+}
+
+u32 GetIA44_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA44_RGBA4444(((u8*)src)[x^(i<<1)]);
+}
+
+u32 GetIA44_IA88( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA44_IA88(((u8*)src)[x^(i<<1)]);
+}
+
+u32 GetI8_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    return I8_RGBA8888(((u8*)src)[x^(i<<1)]);
+}
+
+u32 GetI8_I8( void *src, u16 x, u16 i, u8 palette )
+{
+    return ((u8*)src)[x^(i<<1)];
+}
+
+u32 GetI8_IA88( void *src, u16 x, u16 i, u8 palette )
+{
+    return I8_IA88(((u8*)src)[x^(i<<1)]);
+}
+
+u32 GetI8_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    return I8_RGBA4444(((u8*)src)[x^(i<<1)]);
+}
+
+u32 GetRGBA5551_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    return RGBA5551_RGBA8888( ((u16*)src)[x^i] );
+}
+
+u32 GetRGBA5551_RGBA5551( void *src, u16 x, u16 i, u8 palette )
+{
+    return RGBA5551_RGBA5551( ((u16*)src)[x^i] );
+}
+
+u32 GetIA88_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA88_RGBA8888(((u16*)src)[x^i]);
+}
+
+u32 GetIA88_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA88_RGBA4444(((u16*)src)[x^i]);
+}
+
+u32 GetIA88_IA88( void *src, u16 x, u16 i, u8 palette )
+{
+    return IA88_IA88(((u16*)src)[x^i]);
+}
+
+u32 GetRGBA8888_RGBA8888( void *src, u16 x, u16 i, u8 palette )
+{
+    return ((u32*)src)[x^i];
+}
+
+u32 GetRGBA8888_RGBA4444( void *src, u16 x, u16 i, u8 palette )
+{
+    return RGBA8888_RGBA4444(((u32*)src)[x^i]);
+}
+
+
+struct TextureFormat
+{
+    int format;
+    GetTexelFunc getTexel;
+    int lineShift, maxTexels;
+};
+
+
+TextureFormat textureFormatIA[4*6] =
+{
+    // 4-bit
+    {   FORMAT_RGBA5551,    GetCI4RGBA_RGBA5551,    4,  4096 }, // RGBA (SELECT)
+    {   FORMAT_NONE,        GetNone,                4,  8192 }, // YUV
+    {   FORMAT_RGBA5551,    GetCI4RGBA_RGBA5551,    4,  4096 }, // CI
+    {   FORMAT_IA88,        GetIA31_IA88,           4,  8192 }, // IA
+    {   FORMAT_IA88,        GetI4_IA88,             4,  8192 }, // I
+    {   FORMAT_RGBA8888,    GetCI4IA_RGBA8888,      4,  4096 }, // IA Palette
+    // 8-bit
+    {   FORMAT_RGBA5551,    GetCI8RGBA_RGBA5551,    3,  2048 }, // RGBA (SELECT)
+    {   FORMAT_NONE,        GetNone,                3,  4096 }, // YUV
+    {   FORMAT_RGBA5551,    GetCI8RGBA_RGBA5551,    3,  2048 }, // CI
+    {   FORMAT_IA88,        GetIA44_IA88,           3,  4096 }, // IA
+    {   FORMAT_IA88,        GetI8_IA88,             3,  4096 }, // I
+    {   FORMAT_RGBA8888,    GetCI8IA_RGBA8888,      3,  2048 }, // IA Palette
+    // 16-bit
+    {   FORMAT_RGBA5551,    GetRGBA5551_RGBA5551,   2,  2048 }, // RGBA
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // YUV
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // CI
+    {   FORMAT_IA88,        GetIA88_IA88,           2,  2048 }, // IA
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // I
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // IA Palette
+    // 32-bit
+    {   FORMAT_RGBA8888,    GetRGBA8888_RGBA8888,   2,  1024 }, // RGBA
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // YUV
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // CI
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // IA
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // I
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // IA Palette
+};
+
+TextureFormat textureFormatRGBA[4*6] =
+{
+    // 4-bit
+    {   FORMAT_RGBA5551,    GetCI4RGBA_RGBA5551,    4,  4096 }, // RGBA (SELECT)
+    {   FORMAT_NONE,        GetNone,                4,  8192 }, // YUV
+    {   FORMAT_RGBA5551,    GetCI4RGBA_RGBA5551,    4,  4096 }, // CI
+    {   FORMAT_RGBA4444,    GetIA31_RGBA4444,       4,  8192 }, // IA
+    {   FORMAT_RGBA4444,    GetI4_RGBA4444,         4,  8192 }, // I
+    {   FORMAT_RGBA8888,    GetCI4IA_RGBA8888,      4,  4096 }, // IA Palette
+    // 8-bit
+    {   FORMAT_RGBA5551,    GetCI8RGBA_RGBA5551,    3,  2048 }, // RGBA (SELECT)
+    {   FORMAT_NONE,        GetNone,                3,  4096 }, // YUV
+    {   FORMAT_RGBA5551,    GetCI8RGBA_RGBA5551,    3,  2048 }, // CI
+    {   FORMAT_RGBA4444,    GetIA44_RGBA4444,       3,  4096 }, // IA
+    {   FORMAT_RGBA8888,    GetI8_RGBA8888,         3,  4096 }, // I
+    {   FORMAT_RGBA8888,    GetCI8IA_RGBA8888,      3,  2048 }, // IA Palette
+    // 16-bit
+    {   FORMAT_RGBA5551,    GetRGBA5551_RGBA5551,   2,  2048 }, // RGBA
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // YUV
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // CI
+    {   FORMAT_RGBA8888,    GetIA88_RGBA8888,       2,  2048 }, // IA
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // I
+    {   FORMAT_NONE,        GetNone,                2,  2048 }, // IA Palette
+    // 32-bit
+    {   FORMAT_RGBA8888,    GetRGBA8888_RGBA8888,   2,  1024 }, // RGBA
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // YUV
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // CI
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // IA
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // I
+    {   FORMAT_NONE,        GetNone,                2,  1024 }, // IA Palette
+};
+
+
+TextureFormat *textureFormat = textureFormatIA;
+
+void __texture_format_rgba(int size, int format, TextureFormat *texFormat)
+{
+    if (size < G_IM_SIZ_16b)
+    {
+        if (gDP.otherMode.textureLUT == G_TT_NONE)
+            *texFormat = textureFormat[size*6 + G_IM_FMT_I];
+        else if (gDP.otherMode.textureLUT == G_TT_RGBA16)
+            *texFormat = textureFormat[size*6 + G_IM_FMT_CI];
+        else
+            *texFormat = textureFormat[size*6 + G_IM_FMT_IA];
+    }
+    else
+    {
+        *texFormat = textureFormat[size*6 + G_IM_FMT_RGBA];
+    }
+}
+
+void __texture_format_ci(int size, int format, TextureFormat *texFormat)
+{
+    switch(size)
+    {
+        case G_IM_SIZ_4b:
+            if (gDP.otherMode.textureLUT == G_TT_IA16)
+                *texFormat = textureFormat[G_IM_SIZ_4b*6 + G_IM_FMT_CI_IA];
+            else
+                *texFormat = textureFormat[G_IM_SIZ_4b*6 + G_IM_FMT_CI];
+            break;
+
+        case G_IM_SIZ_8b:
+            if (gDP.otherMode.textureLUT == G_TT_NONE)
+                *texFormat = textureFormat[G_IM_SIZ_8b*6 + G_IM_FMT_I];
+            else if (gDP.otherMode.textureLUT == G_TT_IA16)
+                *texFormat = textureFormat[G_IM_SIZ_8b*6 + G_IM_FMT_CI_IA];
+            else
+                *texFormat = textureFormat[G_IM_SIZ_8b*6 + G_IM_FMT_CI];
+            break;
+
+        default:
+            *texFormat = textureFormat[size*6 + format];
+    }
+}
+
+void __texture_format(int size, int format, TextureFormat *texFormat)
+{
+    if (format == G_IM_FMT_RGBA)
+    {
+        __texture_format_rgba(size, format, texFormat);
+    }
+    else if (format == G_IM_FMT_YUV)
+    {
+        *texFormat = textureFormat[size*6 + G_IM_FMT_YUV];
+    }
+    else if (format == G_IM_FMT_CI)
+    {
+        __texture_format_ci(size, format, texFormat);
+    }
+    else if (format == G_IM_FMT_IA)
+    {
+        if (gDP.otherMode.textureLUT != G_TT_NONE)
+            __texture_format_ci(size, format, texFormat);
+        else
+            *texFormat = textureFormat[size*6 + G_IM_FMT_IA];
+    }
+    else if (format == G_IM_FMT_I)
+    {
+        if (gDP.otherMode.textureLUT == G_TT_NONE)
+            *texFormat = textureFormat[size*6 + G_IM_FMT_I];
+        else
+            __texture_format_ci(size, format, texFormat);
+    }
+}
+
+
+int isTexCacheInit = 0;
+
+void TextureCache_Init()
+{
+    u32 dummyTexture[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    isTexCacheInit = 1;
+    cache.current[0] = NULL;
+    cache.current[1] = NULL;
+    cache.top = NULL;
+    cache.bottom = NULL;
+    cache.numCached = 0;
+    cache.cachedBytes = 0;
+
+#ifdef __HASHMAP_OPT
+    cache.hash.init(11);
+#endif
+
+    if (config.texture.useIA) textureFormat = textureFormatIA;
+    else textureFormat = textureFormatRGBA;
+
+    glPixelStorei(GL_PACK_ALIGNMENT, 1);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+    glGenTextures( 32, cache.glNoiseNames );
+
+    srand(time(NULL));
+    u8 noise[64*64*2];
+    for (u32 i = 0; i < 32; i++)
+    {
+        glBindTexture( GL_TEXTURE_2D, cache.glNoiseNames[i] );
+        for (u32 y = 0; y < 64; y++)
+        {
+            for (u32 x = 0; x < 64; x++)
+            {
+                u32 r = (rand()&0xFF);
+                noise[y*64*2+x*2] = r;
+                noise[y*64*2+x*2+1] = r;
+            }
+        }
+        glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE_ALPHA, 64, 64, 0, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, noise);
+    }
+
+    cache.dummy = TextureCache_AddTop();
+    cache.dummy->address = 0;
+    cache.dummy->clampS = 1;
+    cache.dummy->clampT = 1;
+    cache.dummy->clampWidth = 4;
+    cache.dummy->clampHeight = 4;
+    cache.dummy->crc = 0;
+    cache.dummy->format = 0;
+    cache.dummy->size = 0;
+    cache.dummy->width = 4;
+    cache.dummy->height = 4;
+    cache.dummy->realWidth = 0;
+    cache.dummy->realHeight = 0;
+    cache.dummy->maskS = 0;
+    cache.dummy->maskT = 0;
+    cache.dummy->scaleS = 0.5f;
+    cache.dummy->scaleT = 0.5f;
+    cache.dummy->shiftScaleS = 1.0f;
+    cache.dummy->shiftScaleT = 1.0f;
+    cache.dummy->textureBytes = 64;
+    cache.dummy->tMem = 0;
+
+    glBindTexture( GL_TEXTURE_2D, cache.dummy->glName );
+    glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, 4, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, dummyTexture);
+
+    cache.cachedBytes = cache.dummy->textureBytes;
+    TextureCache_ActivateDummy(0);
+    TextureCache_ActivateDummy(1);
+    CRC_BuildTable();
+}
+
+bool TextureCache_Verify()
+{
+    u16 i = 0;
+    CachedTexture *current;
+
+    current = cache.top;
+
+    while (current)
+    {
+        i++;
+        current = current->lower;
+    }
+    if (i != cache.numCached) return false;
+
+    i = 0;
+    current = cache.bottom;
+    while (current)
+    {
+        i++;
+        current = current->higher;
+    }
+    if (i != cache.numCached) return false;
+
+    return true;
+}
+
+void TextureCache_RemoveBottom()
+{
+    CachedTexture *newBottom = cache.bottom->higher;
+
+#ifdef __HASHMAP_OPT
+    CachedTexture* tex= cache.hash.find(cache.bottom->crc);
+    if (tex == cache.bottom)
+        cache.hash.insert(cache.bottom->crc, NULL);
+#endif
+
+    glDeleteTextures( 1, &cache.bottom->glName );
+    cache.cachedBytes -= cache.bottom->textureBytes;
+
+    if (cache.bottom == cache.top)
+        cache.top = NULL;
+
+    free( cache.bottom );
+
+    cache.bottom = newBottom;
+
+    if (cache.bottom)
+        cache.bottom->lower = NULL;
+
+    cache.numCached--;
+}
+
+void TextureCache_Remove( CachedTexture *texture )
+{
+    if ((texture == cache.bottom) && (texture == cache.top))
+    {
+        cache.top = NULL;
+        cache.bottom = NULL;
+    }
+    else if (texture == cache.bottom)
+    {
+        cache.bottom = texture->higher;
+
+        if (cache.bottom)
+            cache.bottom->lower = NULL;
+    }
+    else if (texture == cache.top)
+    {
+        cache.top = texture->lower;
+
+        if (cache.top)
+            cache.top->higher = NULL;
+    }
+    else
+    {
+        texture->higher->lower = texture->lower;
+        texture->lower->higher = texture->higher;
+    }
+
+#ifdef __HASHMAP_OPT
+    CachedTexture* tex= cache.hash.find(texture->crc);
+    if (tex == texture);
+        cache.hash.insert(texture->crc, NULL);
+#endif
+
+    glDeleteTextures( 1, &texture->glName );
+    cache.cachedBytes -= texture->textureBytes;
+    free( texture );
+
+    cache.numCached--;
+}
+
+CachedTexture *TextureCache_AddTop()
+{
+    while (cache.cachedBytes > TEXTURECACHE_MAX)
+    {
+        if (cache.bottom != cache.dummy)
+            TextureCache_RemoveBottom();
+        else if (cache.dummy->higher)
+            TextureCache_Remove( cache.dummy->higher );
+    }
+
+    CachedTexture *newtop = (CachedTexture*)malloc( sizeof( CachedTexture ) );
+
+    glGenTextures( 1, &newtop->glName );
+
+    newtop->lower = cache.top;
+    newtop->higher = NULL;
+
+    if (cache.top)
+        cache.top->higher = newtop;
+
+    if (!cache.bottom)
+        cache.bottom = newtop;
+
+    cache.top = newtop;
+
+    cache.numCached++;
+
+    return newtop;
+}
+
+void TextureCache_MoveToTop( CachedTexture *newtop )
+{
+    if (newtop == cache.top) return;
+
+    if (newtop == cache.bottom)
+    {
+        cache.bottom = newtop->higher;
+        cache.bottom->lower = NULL;
+    }
+    else
+    {
+        newtop->higher->lower = newtop->lower;
+        newtop->lower->higher = newtop->higher;
+    }
+
+    newtop->higher = NULL;
+    newtop->lower = cache.top;
+    cache.top->higher = newtop;
+    cache.top = newtop;
+}
+
+void TextureCache_Destroy()
+{
+    while (cache.bottom)
+        TextureCache_RemoveBottom();
+
+    glDeleteTextures( 32, cache.glNoiseNames );
+    glDeleteTextures( 1, &cache.dummy->glName  );
+
+#ifdef __HASHMAP_OPT
+    cache.hash.destroy();
+#endif
+
+    cache.top = NULL;
+    cache.bottom = NULL;
+}
+
+
+
+void TextureCache_LoadBackground( CachedTexture *texInfo )
+{
+    u32 *dest, *scaledDest;
+    u8 *swapped, *src;
+    u32 numBytes, bpl;
+    u32 x, y, j, tx, ty;
+    u16 clampSClamp,  clampTClamp;
+
+    int bytePerPixel=0;
+    TextureFormat   texFormat;
+    GetTexelFunc    getTexel;
+    GLint glWidth=0, glHeight=0;
+    GLenum glType=0;
+    GLenum glFormat=0;
+
+    __texture_format(texInfo->size, texInfo->format, &texFormat);
+
+#ifdef PRINT_TEXTUREFORMAT
+    printf("BG LUT=%i, TEXTURE SIZE=%i, FORMAT=%i -> GL FORMAT=%i\n", gDP.otherMode.textureLUT, texInfo->size, texInfo->format, texFormat.format); fflush(stdout);
+#endif
+
+    if (texFormat.format == FORMAT_NONE)
+    {
+        LOG(LOG_WARNING, "No Texture Conversion function available, size=%i format=%i\n", texInfo->size, texInfo->format);
+    }
+
+    switch(texFormat.format)
+    {
+        case FORMAT_I8:
+            glFormat = GL_LUMINANCE;
+            glType = GL_UNSIGNED_BYTE;
+            bytePerPixel = 1;
+            break;
+        case FORMAT_IA88:
+            glFormat = GL_LUMINANCE_ALPHA;
+            glType = GL_UNSIGNED_BYTE;
+            bytePerPixel = 2;
+            break;
+        case FORMAT_RGBA4444:
+            glFormat = GL_RGBA;
+            glType = GL_UNSIGNED_SHORT_4_4_4_4;
+            bytePerPixel = 2;
+            break;
+        case FORMAT_RGBA5551:
+            glFormat = GL_RGBA;
+            glType = GL_UNSIGNED_SHORT_5_5_5_1;
+            bytePerPixel = 2;
+            break;
+        case FORMAT_RGBA8888:
+            glFormat = GL_RGBA;
+            glType = GL_UNSIGNED_BYTE;
+            bytePerPixel = 4;
+            break;
+    }
+
+    glWidth = texInfo->realWidth;
+    glHeight = texInfo->realHeight;
+    texInfo->textureBytes = (glWidth * glHeight) * bytePerPixel;
+    getTexel = texFormat.getTexel;
+
+    bpl = gSP.bgImage.width << gSP.bgImage.size >> 1;
+    numBytes = bpl * gSP.bgImage.height;
+    swapped = (u8*) malloc(numBytes);
+    dest = (u32*) malloc(texInfo->textureBytes);
+
+    if (!dest || !swapped)
+    {
+        LOG(LOG_ERROR, "Malloc failed!\n");
+        return;
+    }
+
+    UnswapCopy(&RDRAM[gSP.bgImage.address], swapped, numBytes);
+
+    clampSClamp = texInfo->width - 1;
+    clampTClamp = texInfo->height - 1;
+
+    j = 0;
+    for (y = 0; y < texInfo->realHeight; y++)
+    {
+        ty = min(y, clampTClamp);
+        src = &swapped[bpl * ty];
+        for (x = 0; x < texInfo->realWidth; x++)
+        {
+            tx = min(x, clampSClamp);
+            if (bytePerPixel == 4)
+                ((u32*)dest)[j++] = getTexel(src, tx, 0, texInfo->palette);
+            else if (bytePerPixel == 2)
+                ((u16*)dest)[j++] = getTexel(src, tx, 0, texInfo->palette);
+            else if (bytePerPixel == 1)
+                ((u8*)dest)[j++] = getTexel(src, tx, 0, texInfo->palette);
+        }
+    }
+
+    if (!config.texture.sai2x || (texFormat.format == FORMAT_I8 || texFormat.format == FORMAT_IA88))
+    {
+        glTexImage2D( GL_TEXTURE_2D, 0, glFormat, glWidth, glHeight, 0, glFormat, glType, dest);
+    }
+    else
+    {
+        LOG(LOG_VERBOSE, "Using 2xSAI Filter on Texture\n");
+        texInfo->textureBytes <<= 2;
+
+        scaledDest = (u32*) malloc( texInfo->textureBytes );
+
+        if (glType == GL_UNSIGNED_BYTE)
+            _2xSaI8888( (u32*)dest, (u32*)scaledDest, texInfo->realWidth, texInfo->realHeight, texInfo->clampS, texInfo->clampT );
+        if (glType == GL_UNSIGNED_SHORT_4_4_4_4)
+            _2xSaI4444( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, texInfo->clampS, texInfo->clampT );
+        else
+            _2xSaI5551( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, texInfo->clampS, texInfo->clampT );
+
+        glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, texInfo->realWidth << 1, texInfo->realHeight << 1, 0, GL_RGBA, glType, scaledDest );
+
+        free( scaledDest );
+    }
+
+    free(dest);
+    free(swapped);
+
+
+    if (config.texture.enableMipmap)
+        glGenerateMipmap(GL_TEXTURE_2D);
+}
+
+void TextureCache_Load( CachedTexture *texInfo )
+{
+    u32 *dest, *scaledDest;
+
+    void *src;
+    u16 x, y, i, j, tx, ty, line;
+    u16 mirrorSBit, maskSMask, clampSClamp;
+    u16 mirrorTBit, maskTMask, clampTClamp;
+
+    int bytePerPixel=0;
+    TextureFormat   texFormat;
+    GetTexelFunc    getTexel;
+    GLint glWidth=0, glHeight=0;
+    GLenum glType=0;
+    GLenum glFormat=0;
+
+    __texture_format(texInfo->size, texInfo->format, &texFormat);
+
+#ifdef PRINT_TEXTUREFORMAT
+    printf("TEX LUT=%i, TEXTURE SIZE=%i, FORMAT=%i -> GL FORMAT=%i\n", gDP.otherMode.textureLUT, texInfo->size, texInfo->format, texFormat.format); fflush(stdout);
+#endif
+
+    if (texFormat.format == FORMAT_NONE)
+    {
+        LOG(LOG_WARNING, "No Texture Conversion function available, size=%i format=%i\n", texInfo->size, texInfo->format);
+    }
+
+    switch(texFormat.format)
+    {
+        case FORMAT_I8:
+            glFormat = GL_LUMINANCE;
+            glType = GL_UNSIGNED_BYTE;
+            bytePerPixel = 1;
+            break;
+        case FORMAT_IA88:
+            glFormat = GL_LUMINANCE_ALPHA;
+            glType = GL_UNSIGNED_BYTE;
+            bytePerPixel = 2;
+            break;
+        case FORMAT_RGBA4444:
+            glFormat = GL_RGBA;
+            glType = GL_UNSIGNED_SHORT_4_4_4_4;
+            bytePerPixel = 2;
+            break;
+        case FORMAT_RGBA5551:
+            glFormat = GL_RGBA;
+            glType = GL_UNSIGNED_SHORT_5_5_5_1;
+            bytePerPixel = 2;
+            break;
+        case FORMAT_RGBA8888:
+            glFormat = GL_RGBA;
+            glType = GL_UNSIGNED_BYTE;
+            bytePerPixel = 4;
+            break;
+    }
+
+    glWidth = texInfo->realWidth;
+    glHeight = texInfo->realHeight;
+    texInfo->textureBytes = (glWidth * glHeight) * bytePerPixel;
+    getTexel = texFormat.getTexel;
+
+    dest = (u32*)malloc(texInfo->textureBytes);
+
+    if (!dest)
+    {
+        LOG(LOG_ERROR, "Malloc failed!\n");
+        return;
+    }
+
+
+    line = texInfo->line;
+
+    if (texInfo->size == G_IM_SIZ_32b)
+        line <<= 1;
+
+    if (texInfo->maskS)
+    {
+        clampSClamp = texInfo->clampS ? texInfo->clampWidth - 1 : (texInfo->mirrorS ? (texInfo->width << 1) - 1 : texInfo->width - 1);
+        maskSMask = (1 << texInfo->maskS) - 1;
+        mirrorSBit = texInfo->mirrorS ? (1 << texInfo->maskS) : 0;
+    }
+    else
+    {
+        clampSClamp = min( texInfo->clampWidth, texInfo->width ) - 1;
+        maskSMask = 0xFFFF;
+        mirrorSBit = 0x0000;
+    }
+
+    if (texInfo->maskT)
+    {
+        clampTClamp = texInfo->clampT ? texInfo->clampHeight - 1 : (texInfo->mirrorT ? (texInfo->height << 1) - 1: texInfo->height - 1);
+        maskTMask = (1 << texInfo->maskT) - 1;
+        mirrorTBit = texInfo->mirrorT ? (1 << texInfo->maskT) : 0;
+    }
+    else
+    {
+        clampTClamp = min( texInfo->clampHeight, texInfo->height ) - 1;
+        maskTMask = 0xFFFF;
+        mirrorTBit = 0x0000;
+    }
+
+    // Hack for Zelda warp texture
+    if (((texInfo->tMem << 3) + (texInfo->width * texInfo->height << texInfo->size >> 1)) > 4096)
+    {
+        texInfo->tMem = 0;
+    }
+
+    // limit clamp values to min-0 (Perfect Dark has height=0 textures, making negative clamps)
+    if (clampTClamp & 0x8000) clampTClamp = 0;
+    if (clampSClamp & 0x8000) clampSClamp = 0;
+
+    j = 0;
+    for (y = 0; y < texInfo->realHeight; y++)
+    {
+        ty = min(y, clampTClamp) & maskTMask;
+        if (y & mirrorTBit) ty ^= maskTMask;
+        src = &TMEM[(texInfo->tMem + line * ty) & 511];
+        i = (ty & 1) << 1;
+        for (x = 0; x < texInfo->realWidth; x++)
+        {
+            tx = min(x, clampSClamp) & maskSMask;
+
+            if (x & mirrorSBit) tx ^= maskSMask;
+
+            if (bytePerPixel == 4)
+            {
+                ((u32*)dest)[j] = getTexel(src, tx, i, texInfo->palette);
+            }
+            else if (bytePerPixel == 2)
+            {
+                ((u16*)dest)[j] = getTexel(src, tx, i, texInfo->palette);
+            }
+            else if (bytePerPixel == 1)
+            {
+                ((u8*)dest)[j] = getTexel(src, tx, i, texInfo->palette);
+            }
+            j++;
+        }
+    }
+
+    if (!config.texture.sai2x || (texFormat.format == FORMAT_I8) || (texFormat.format == FORMAT_IA88))
+    {
+#ifdef PRINT_TEXTUREFORMAT
+        printf("j=%u DEST=0x%x SIZE=%i F=0x%x, W=%i, H=%i, T=0x%x\n", j, dest, texInfo->textureBytes,glFormat, glWidth, glHeight, glType); fflush(stdout);
+#endif
+        glTexImage2D( GL_TEXTURE_2D, 0, glFormat, glWidth, glHeight, 0, glFormat, glType, dest);
+    }
+    else
+    {
+        LOG(LOG_VERBOSE, "Using 2xSAI Filter on Texture\n");
+
+        texInfo->textureBytes <<= 2;
+
+        scaledDest = (u32*)malloc( texInfo->textureBytes );
+
+        if (glType == GL_UNSIGNED_BYTE)
+            _2xSaI8888( (u32*)dest, (u32*)scaledDest, texInfo->realWidth, texInfo->realHeight, 1, 1 );
+        else if (glType == GL_UNSIGNED_SHORT_4_4_4_4)
+            _2xSaI4444( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, 1, 1 );
+        else
+            _2xSaI5551( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, 1, 1 );
+
+        glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, texInfo->realWidth << 1, texInfo->realHeight << 1, 0, GL_RGBA, glType, scaledDest );
+
+        free( scaledDest );
+    }
+
+    free(dest);
+
+    if (config.texture.enableMipmap)
+        glGenerateMipmap(GL_TEXTURE_2D);
+
+}
+
+#define max(a,b) ((a) > (b) ? (a) : (b))
+
+u32 TextureCache_CalculateCRC( u32 t, u32 width, u32 height )
+{
+    u32 crc;
+    u32 y, /*i,*/ bpl, lineBytes, line;
+    void *src;
+
+    bpl = width << gSP.textureTile[t]->size >> 1;
+    lineBytes = gSP.textureTile[t]->line << 3;
+
+    line = gSP.textureTile[t]->line;
+    if (gSP.textureTile[t]->size == G_IM_SIZ_32b)
+        line <<= 1;
+
+    crc = 0xFFFFFFFF;
+
+#ifdef __CRC_OPT
+    unsigned n = (config.texture.fastCRC) ? max(1, height / 8) : 1;
+#else
+    unsigned n = 1;
+#endif
+
+    for (y = 0; y < height; y += n)
+    {
+        src = (void*) &TMEM[(gSP.textureTile[t]->tmem + (y * line)) & 511];
+        crc = CRC_Calculate( crc, src, bpl );
+    }
+
+    if (gSP.textureTile[t]->format == G_IM_FMT_CI)
+    {
+        if (gSP.textureTile[t]->size == G_IM_SIZ_4b)
+            crc = CRC_Calculate( crc, &gDP.paletteCRC16[gSP.textureTile[t]->palette], 4 );
+        else if (gSP.textureTile[t]->size == G_IM_SIZ_8b)
+            crc = CRC_Calculate( crc, &gDP.paletteCRC256, 4 );
+    }
+    return crc;
+}
+
+void TextureCache_ActivateTexture( u32 t, CachedTexture *texture )
+{
+
+#ifdef __HASHMAP_OPT
+    cache.hash.insert(texture->crc, texture);
+#endif
+
+    glActiveTexture( GL_TEXTURE0 + t );
+    glBindTexture( GL_TEXTURE_2D, texture->glName );
+
+    // Set filter mode. Almost always bilinear, but check anyways
+    if ((gDP.otherMode.textureFilter == G_TF_BILERP) || (gDP.otherMode.textureFilter == G_TF_AVERAGE) || (config.texture.forceBilinear))
+    {
+        glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
+        glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
+    }
+    else
+    {
+        glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+        glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+    }
+
+    // Set clamping modes
+    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (texture->clampS) ? GL_CLAMP_TO_EDGE : GL_REPEAT );
+    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (texture->clampT) ? GL_CLAMP_TO_EDGE : GL_REPEAT );
+
+    if (config.texture.maxAnisotropy > 0)
+    {
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, config.texture.maxAnisotropy);
+    }
+
+    texture->lastDList = RSP.DList;
+    TextureCache_MoveToTop( texture );
+    cache.current[t] = texture;
+}
+
+void TextureCache_ActivateDummy( u32 t)
+{
+    glActiveTexture(GL_TEXTURE0 + t);
+    glBindTexture(GL_TEXTURE_2D, cache.dummy->glName );
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+}
+
+int _background_compare(CachedTexture *current, u32 crc)
+{
+    if ((current != NULL) &&
+        (current->crc == crc) &&
+        (current->width == gSP.bgImage.width) &&
+        (current->height == gSP.bgImage.height) &&
+        (current->format == gSP.bgImage.format) &&
+        (current->size == gSP.bgImage.size))
+        return 1;
+    else
+        return 0;
+}
+
+void TextureCache_UpdateBackground()
+{
+    u32 numBytes = gSP.bgImage.width * gSP.bgImage.height << gSP.bgImage.size >> 1;
+    u32 crc;
+
+    crc = CRC_Calculate( 0xFFFFFFFF, &RDRAM[gSP.bgImage.address], numBytes );
+
+    if (gSP.bgImage.format == G_IM_FMT_CI)
+    {
+        if (gSP.bgImage.size == G_IM_SIZ_4b)
+            crc = CRC_Calculate( crc, &gDP.paletteCRC16[gSP.bgImage.palette], 4 );
+        else if (gSP.bgImage.size == G_IM_SIZ_8b)
+            crc = CRC_Calculate( crc, &gDP.paletteCRC256, 4 );
+    }
+
+    //before we traverse cache, check to see if texture is already bound:
+    if (_background_compare(cache.current[0], crc))
+    {
+        return;
+    }
+
+#ifdef __HASHMAP_OPT
+    CachedTexture *tex = cache.hash.find(crc);
+    if (tex)
+    {
+        if (_background_compare(tex, crc))
+        {
+            TextureCache_ActivateTexture(0, tex);
+            cache.hits++;
+            return;
+        }
+    }
+#endif
+
+    CachedTexture *current = cache.top;
+    while (current)
+    {
+        if (_background_compare(current, crc))
+        {
+            TextureCache_ActivateTexture( 0, current );
+            cache.hits++;
+            return;
+        }
+        current = current->lower;
+    }
+    cache.misses++;
+
+    glActiveTexture(GL_TEXTURE0);
+    cache.current[0] = TextureCache_AddTop();
+
+    glBindTexture( GL_TEXTURE_2D, cache.current[0]->glName );
+    cache.current[0]->address = gSP.bgImage.address;
+    cache.current[0]->crc = crc;
+    cache.current[0]->format = gSP.bgImage.format;
+    cache.current[0]->size = gSP.bgImage.size;
+    cache.current[0]->width = gSP.bgImage.width;
+    cache.current[0]->height = gSP.bgImage.height;
+    cache.current[0]->clampWidth = gSP.bgImage.width;
+    cache.current[0]->clampHeight = gSP.bgImage.height;
+    cache.current[0]->palette = gSP.bgImage.palette;
+    cache.current[0]->maskS = 0;
+    cache.current[0]->maskT = 0;
+    cache.current[0]->mirrorS = 0;
+    cache.current[0]->mirrorT = 0;
+    cache.current[0]->clampS = 1;
+    cache.current[0]->clampT = 1;
+    cache.current[0]->line = 0;
+    cache.current[0]->tMem = 0;
+    cache.current[0]->lastDList = RSP.DList;
+
+    cache.current[0]->realWidth = (config.texture.pow2) ? pow2(gSP.bgImage.width ) : gSP.bgImage.width;
+    cache.current[0]->realHeight = (config.texture.pow2) ? pow2(gSP.bgImage.height) : gSP.bgImage.height;
+
+    cache.current[0]->scaleS = 1.0f / (f32)(cache.current[0]->realWidth);
+    cache.current[0]->scaleT = 1.0f / (f32)(cache.current[0]->realHeight);
+    cache.current[0]->shiftScaleS = 1.0f;
+    cache.current[0]->shiftScaleT = 1.0f;
+
+    TextureCache_LoadBackground( cache.current[0] );
+    TextureCache_ActivateTexture( 0, cache.current[0] );
+
+    cache.cachedBytes += cache.current[0]->textureBytes;
+}
+
+int _texture_compare(u32 t, CachedTexture *current, u32 crc,  u32 width, u32 height, u32 clampWidth, u32 clampHeight)
+{
+    if  ((current != NULL) &&
+        (current->crc == crc) &&
+        (current->width == width) &&
+        (current->height == height) &&
+        (current->clampWidth == clampWidth) &&
+        (current->clampHeight == clampHeight) &&
+        (current->maskS == gSP.textureTile[t]->masks) &&
+        (current->maskT == gSP.textureTile[t]->maskt) &&
+        (current->mirrorS == gSP.textureTile[t]->mirrors) &&
+        (current->mirrorT == gSP.textureTile[t]->mirrort) &&
+        (current->clampS == gSP.textureTile[t]->clamps) &&
+        (current->clampT == gSP.textureTile[t]->clampt) &&
+        (current->format == gSP.textureTile[t]->format) &&
+        (current->size == gSP.textureTile[t]->size))
+        return 1;
+    else
+        return 0;
+}
+
+
+void TextureCache_Update( u32 t )
+{
+    CachedTexture *current;
+
+    u32 crc, maxTexels;
+    u32 tileWidth, maskWidth, loadWidth, lineWidth, clampWidth, height;
+    u32 tileHeight, maskHeight, loadHeight, lineHeight, clampHeight, width;
+
+    if (gDP.textureMode == TEXTUREMODE_BGIMAGE)
+    {
+        TextureCache_UpdateBackground();
+        return;
+    }
+
+    TextureFormat texFormat;
+    __texture_format(gSP.textureTile[t]->size, gSP.textureTile[t]->format, &texFormat);
+
+    maxTexels = texFormat.maxTexels;
+
+    // Here comes a bunch of code that just calculates the texture size...I wish there was an easier way...
+    tileWidth = gSP.textureTile[t]->lrs - gSP.textureTile[t]->uls + 1;
+    tileHeight = gSP.textureTile[t]->lrt - gSP.textureTile[t]->ult + 1;
+
+    maskWidth = 1 << gSP.textureTile[t]->masks;
+    maskHeight = 1 << gSP.textureTile[t]->maskt;
+
+    loadWidth = gDP.loadTile->lrs - gDP.loadTile->uls + 1;
+    loadHeight = gDP.loadTile->lrt - gDP.loadTile->ult + 1;
+
+    lineWidth = gSP.textureTile[t]->line << texFormat.lineShift;
+
+    if (lineWidth) // Don't allow division by zero
+        lineHeight = min( maxTexels / lineWidth, tileHeight );
+    else
+        lineHeight = 0;
+
+    if (gDP.textureMode == TEXTUREMODE_TEXRECT)
+    {
+        u32 texRectWidth = gDP.texRect.width - gSP.textureTile[t]->uls;
+        u32 texRectHeight = gDP.texRect.height - gSP.textureTile[t]->ult;
+
+        if (gSP.textureTile[t]->masks && ((maskWidth * maskHeight) <= maxTexels))
+            width = maskWidth;
+        else if ((tileWidth * tileHeight) <= maxTexels)
+            width = tileWidth;
+        else if ((tileWidth * texRectHeight) <= maxTexels)
+            width = tileWidth;
+        else if ((texRectWidth * tileHeight) <= maxTexels)
+            width = gDP.texRect.width;
+        else if ((texRectWidth * texRectHeight) <= maxTexels)
+            width = gDP.texRect.width;
+        else if (gDP.loadType == LOADTYPE_TILE)
+            width = loadWidth;
+        else
+            width = lineWidth;
+
+        if (gSP.textureTile[t]->maskt && ((maskWidth * maskHeight) <= maxTexels))
+            height = maskHeight;
+        else if ((tileWidth * tileHeight) <= maxTexels)
+            height = tileHeight;
+        else if ((tileWidth * texRectHeight) <= maxTexels)
+            height = gDP.texRect.height;
+        else if ((texRectWidth * tileHeight) <= maxTexels)
+            height = tileHeight;
+        else if ((texRectWidth * texRectHeight) <= maxTexels)
+            height = gDP.texRect.height;
+        else if (gDP.loadType == LOADTYPE_TILE)
+            height = loadHeight;
+        else
+            height = lineHeight;
+    }
+    else
+    {
+        if (gSP.textureTile[t]->masks && ((maskWidth * maskHeight) <= maxTexels))
+            width = maskWidth;
+        else if ((tileWidth * tileHeight) <= maxTexels)
+            width = tileWidth;
+        else if (gDP.loadType == LOADTYPE_TILE)
+            width = loadWidth;
+        else
+            width = lineWidth;
+
+        if (gSP.textureTile[t]->maskt && ((maskWidth * maskHeight) <= maxTexels))
+            height = maskHeight;
+        else if ((tileWidth * tileHeight) <= maxTexels)
+            height = tileHeight;
+        else if (gDP.loadType == LOADTYPE_TILE)
+            height = loadHeight;
+        else
+            height = lineHeight;
+    }
+
+    clampWidth = gSP.textureTile[t]->clamps ? tileWidth : width;
+    clampHeight = gSP.textureTile[t]->clampt ? tileHeight : height;
+
+    if (clampWidth > 256)
+        gSP.textureTile[t]->clamps = 0;
+    if (clampHeight > 256)
+        gSP.textureTile[t]->clampt = 0;
+
+    // Make sure masking is valid
+    if (maskWidth > width)
+    {
+        gSP.textureTile[t]->masks = powof( width );
+        maskWidth = 1 << gSP.textureTile[t]->masks;
+    }
+
+    if (maskHeight > height)
+    {
+        gSP.textureTile[t]->maskt = powof( height );
+        maskHeight = 1 << gSP.textureTile[t]->maskt;
+    }
+
+    crc = TextureCache_CalculateCRC( t, width, height );
+
+    //before we traverse cache, check to see if texture is already bound:
+    if (_texture_compare(t, cache.current[t], crc, width, height, clampWidth, clampHeight))
+    {
+        cache.hits++;
+        return;
+    }
+
+#ifdef __HASHMAP_OPT
+    CachedTexture *tex = cache.hash.find(crc);
+    if (tex)
+    {
+        if (_texture_compare(t, tex, crc, width, height, clampWidth, clampHeight))
+        {
+            TextureCache_ActivateTexture( t, tex);
+            cache.hits++;
+            return;
+        }
+    }
+#endif
+
+    current = cache.top;
+    while (current)
+    {
+        if  (_texture_compare(t, current, crc, width, height, clampWidth, clampHeight))
+        {
+            TextureCache_ActivateTexture( t, current );
+            cache.hits++;
+            return;
+        }
+
+        current = current->lower;
+    }
+
+    cache.misses++;
+
+    glActiveTexture( GL_TEXTURE0 + t);
+
+    cache.current[t] = TextureCache_AddTop();
+
+    if (cache.current[t] == NULL)
+    {
+        LOG(LOG_ERROR, "Texture Cache Failure\n");
+    }
+
+    glBindTexture( GL_TEXTURE_2D, cache.current[t]->glName );
+
+    cache.current[t]->address = gDP.textureImage.address;
+    cache.current[t]->crc = crc;
+
+    cache.current[t]->format = gSP.textureTile[t]->format;
+    cache.current[t]->size = gSP.textureTile[t]->size;
+
+    cache.current[t]->width = width;
+    cache.current[t]->height = height;
+    cache.current[t]->clampWidth = clampWidth;
+    cache.current[t]->clampHeight = clampHeight;
+    cache.current[t]->palette = gSP.textureTile[t]->palette;
+    cache.current[t]->maskS = gSP.textureTile[t]->masks;
+    cache.current[t]->maskT = gSP.textureTile[t]->maskt;
+    cache.current[t]->mirrorS = gSP.textureTile[t]->mirrors;
+    cache.current[t]->mirrorT = gSP.textureTile[t]->mirrort;
+    cache.current[t]->clampS = gSP.textureTile[t]->clamps;
+    cache.current[t]->clampT = gSP.textureTile[t]->clampt;
+    cache.current[t]->line = gSP.textureTile[t]->line;
+    cache.current[t]->tMem = gSP.textureTile[t]->tmem;
+    cache.current[t]->lastDList = RSP.DList;
+
+
+    if (cache.current[t]->clampS)
+        cache.current[t]->realWidth = (config.texture.pow2) ? pow2(clampWidth) : clampWidth;
+    else if (cache.current[t]->mirrorS)
+        cache.current[t]->realWidth = maskWidth << 1;
+    else
+        cache.current[t]->realWidth = (config.texture.pow2) ? pow2(width) : width;
+
+    if (cache.current[t]->clampT)
+        cache.current[t]->realHeight = (config.texture.pow2) ? pow2(clampHeight) : clampHeight;
+    else if (cache.current[t]->mirrorT)
+        cache.current[t]->realHeight = maskHeight << 1;
+    else
+        cache.current[t]->realHeight = (config.texture.pow2) ? pow2(height) : height;
+
+
+    cache.current[t]->scaleS = 1.0f / (f32)(cache.current[t]->realWidth);
+    cache.current[t]->scaleT = 1.0f / (f32)(cache.current[t]->realHeight);
+
+    // Hack for Zelda Sun
+    if ((config.hackZelda) && (gDP.combine.mux == 0x00262a60150c937fLL))
+    {
+        if ((cache.current[t]->format = G_IM_FMT_I) && (cache.current[t]->size == G_IM_SIZ_8b) &&
+            (cache.current[t]->width == 64))
+        {
+            cache.current[t]->scaleS *= 0.5f;
+            cache.current[t]->scaleT *= 0.5f;
+        }
+    }
+
+    cache.current[t]->shiftScaleS = 1.0f;
+    cache.current[t]->shiftScaleT = 1.0f;
+
+    cache.current[t]->offsetS = config.texture.sai2x ? 0.25f : 0.5f;
+    cache.current[t]->offsetT = config.texture.sai2x ? 0.25f : 0.5f;
+
+    if (gSP.textureTile[t]->shifts > 10)
+        cache.current[t]->shiftScaleS = (f32)(1 << (16 - gSP.textureTile[t]->shifts));
+    else if (gSP.textureTile[t]->shifts > 0)
+        cache.current[t]->shiftScaleS /= (f32)(1 << gSP.textureTile[t]->shifts);
+
+    if (gSP.textureTile[t]->shiftt > 10)
+        cache.current[t]->shiftScaleT = (f32)(1 << (16 - gSP.textureTile[t]->shiftt));
+    else if (gSP.textureTile[t]->shiftt > 0)
+        cache.current[t]->shiftScaleT /= (f32)(1 << gSP.textureTile[t]->shiftt);
+
+    TextureCache_Load( cache.current[t] );
+    TextureCache_ActivateTexture( t, cache.current[t] );
+
+    cache.cachedBytes += cache.current[t]->textureBytes;
+}
+
+void TextureCache_ActivateNoise(u32 t)
+{
+    glActiveTexture(GL_TEXTURE0 + t);
+    glBindTexture(GL_TEXTURE_2D, cache.glNoiseNames[RSP.DList & 0x1F]);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT );
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT );
+}
+
diff --git a/source/gles2n64/src/Textures.h b/source/gles2n64/src/Textures.h
new file mode 100644 (file)
index 0000000..57ff50d
--- /dev/null
@@ -0,0 +1,91 @@
+#ifndef TEXTURES_H
+#define TEXTURES_H
+
+#include <GLES2/gl2.h>
+
+#include "Hash.h"
+#include "convert.h"
+
+struct CachedTexture
+{
+    GLuint  glName;
+    u32     address;
+    u32     crc;
+    float   offsetS, offsetT;
+    u32     maskS, maskT;
+    u32     clampS, clampT;
+    u32     mirrorS, mirrorT;
+    u32     line;
+    u32     size;
+    u32     format;
+    u32     tMem;
+    u32     palette;
+    u32     width, height;            // N64 width and height
+    u32     clampWidth, clampHeight;  // Size to clamp to
+    u32     realWidth, realHeight;    // Actual texture size
+    f32     scaleS, scaleT;           // Scale to map to 0.0-1.0
+    f32     shiftScaleS, shiftScaleT; // Scale to shift
+    u32     textureBytes;
+
+    CachedTexture   *lower, *higher;
+    u32     lastDList;
+
+};
+
+#define TEXTURECACHE_MAX (8 * 1024 * 1024)
+#define TEXTUREBUFFER_SIZE (512 * 1024)
+
+struct TextureCache
+{
+    CachedTexture   *current[2];
+    CachedTexture   *bottom, *top;
+    CachedTexture   *dummy;
+
+    u32             cachedBytes;
+    u32             numCached;
+    u32             hits, misses;
+    GLuint          glNoiseNames[32];
+
+    HashMap<CachedTexture>  hash;
+
+};
+
+extern TextureCache cache;
+
+inline u32 pow2( u32 dim )
+{
+    u32 i = 1;
+
+    while (i < dim) i <<= 1;
+
+    return i;
+}
+
+inline u32 powof( u32 dim )
+{
+    u32 num = 1;
+    u32 i = 0;
+
+    while (num < dim)
+    {
+        num <<= 1;
+        i++;
+    }
+
+    return i;
+}
+
+CachedTexture *TextureCache_AddTop();
+void TextureCache_MoveToTop( CachedTexture *newtop );
+void TextureCache_Remove( CachedTexture *texture );
+void TextureCache_RemoveBottom();
+void TextureCache_Init();
+void TextureCache_Destroy();
+void TextureCache_Update( u32 t );
+void TextureCache_ActivateTexture( u32 t, CachedTexture *texture );
+void TextureCache_ActivateNoise( u32 t );
+void TextureCache_ActivateDummy( u32 t );
+bool TextureCache_Verify();
+
+#endif
+
diff --git a/source/gles2n64/src/Types.h b/source/gles2n64/src/Types.h
new file mode 100644 (file)
index 0000000..64753c0
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef TYPES_H
+#define TYPES_H
+
+#include <stdint.h>
+
+typedef uint8_t   u8;  /* unsigned  8-bit */
+typedef uint16_t  u16; /* unsigned 16-bit */
+typedef uint32_t  u32; /* unsigned 32-bit */
+typedef uint64_t  u64; /* unsigned 64-bit */
+
+typedef int8_t   s8;  /* signed  8-bit */
+typedef int16_t  s16; /* signed 16-bit */
+typedef int32_t  s32; /* signed 32-bit */
+typedef int64_t  s64; /* signed 64-bit */
+
+typedef volatile uint8_t   vu8;    /* unsigned  8-bit */
+typedef volatile uint16_t  vu16;   /* unsigned 16-bit */
+typedef volatile uint32_t  vu32;   /* unsigned 32-bit */
+typedef volatile uint64_t  vu64;   /* unsigned 64-bit */
+
+typedef volatile int8_t    vs8;    /* signed  8-bit */
+typedef volatile int16_t   vs16;   /* signed 16-bit */
+typedef volatile int32_t   vs32;   /* signed 32-bit */
+typedef volatile int64_t   vs64;   /* signed 64-bit */
+
+typedef float              f32;    /* single prec floating point */
+typedef double             f64;    /* double prec floating point */
+
+#ifndef TRUE
+#define TRUE    1
+#endif
+
+#ifndef FALSE
+#define FALSE   0
+#endif
+
+#ifndef NULL
+#define NULL    0
+#endif
+
+#endif // TYPES_H
+
diff --git a/source/gles2n64/src/VI.cpp b/source/gles2n64/src/VI.cpp
new file mode 100644 (file)
index 0000000..37e280f
--- /dev/null
@@ -0,0 +1,101 @@
+#include "Common.h"
+#include "gles2N64.h"
+#include "Types.h"
+#include "VI.h"
+#include "OpenGL.h"
+#include "N64.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "RSP.h"
+#include "Debug.h"
+#include "Config.h"
+
+VIInfo VI;
+
+void VI_UpdateSize()
+{
+
+    if (!config.video.force)
+    {
+        f32 xScale = _FIXED2FLOAT( _SHIFTR( *REG.VI_X_SCALE, 0, 12 ), 10 );
+        f32 xOffset = _FIXED2FLOAT( _SHIFTR( *REG.VI_X_SCALE, 16, 12 ), 10 );
+
+        f32 yScale = _FIXED2FLOAT( _SHIFTR( *REG.VI_Y_SCALE, 0, 12 ), 10 );
+        f32 yOffset = _FIXED2FLOAT( _SHIFTR( *REG.VI_Y_SCALE, 16, 12 ), 10 );
+
+        u32 hEnd = _SHIFTR( *REG.VI_H_START, 0, 10 );
+        u32 hStart = _SHIFTR( *REG.VI_H_START, 16, 10 );
+
+        // These are in half-lines, so shift an extra bit
+        u32 vEnd = _SHIFTR( *REG.VI_V_START, 1, 9 );
+        u32 vStart = _SHIFTR( *REG.VI_V_START, 17, 9 );
+
+        //Glide does this:
+        if (hEnd == hStart) hEnd = (u32)(*REG.VI_WIDTH / xScale);
+
+
+        VI.width = (hEnd - hStart) * xScale;
+        VI.height = (vEnd - vStart) * yScale * 1.0126582f;
+    }
+    else
+    {
+        VI.width = config.video.width;
+        VI.height = config.video.height;
+    }
+
+    if (VI.width == 0.0f) VI.width = 320.0f;
+    if (VI.height == 0.0f) VI.height = 240.0f;
+    VI.rwidth = 1.0f / VI.width;
+    VI.rheight = 1.0f / VI.height;
+
+
+    //add display buffer if doesn't exist
+    if (config.ignoreOffscreenRendering)
+    {
+        int i;
+        //int start = *REG.VI_ORIGIN;
+        u32 start = RSP_SegmentToPhysical(*REG.VI_ORIGIN) & 0x00FFFFFF;
+        u32 end = min(start + VI.width * VI.height * 4, RDRAMSize);
+        for(i = 0; i < VI.displayNum; i++)
+        {
+            if (VI.display[i].start <= end && VI.display[i].start >= start) break;
+            if (start <= VI.display[i].end && start >= VI.display[i].start) break;
+        }
+        if (i == VI.displayNum)
+        {
+            //printf("VI IMAGE=%i\n", o);
+            VI.display[i%16].start = start;
+            VI.display[i%16].end = end;
+            VI.displayNum = (VI.displayNum < 16) ? (VI.displayNum+1) : 16;
+        }
+    }
+
+}
+
+void VI_UpdateScreen()
+{
+
+    switch(config.updateMode)
+    {
+
+        case SCREEN_UPDATE_AT_VI_CHANGE:
+            if (*REG.VI_ORIGIN != VI.lastOrigin)
+            {
+                if (*REG.VI_ORIGIN < VI.lastOrigin || *REG.VI_ORIGIN > VI.lastOrigin+0x2000  )
+                    OGL_SwapBuffers();
+
+                VI.lastOrigin = *REG.VI_ORIGIN;
+            }
+            break;
+
+        case SCREEN_UPDATE_AT_VI_UPDATE:
+            if (gSP.changed & CHANGED_COLORBUFFER)
+            {
+                OGL_SwapBuffers();
+                gSP.changed &= ~CHANGED_COLORBUFFER;
+            }
+            break;
+    }
+
+}
+
diff --git a/source/gles2n64/src/VI.h b/source/gles2n64/src/VI.h
new file mode 100644 (file)
index 0000000..c138744
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef VI_H
+#define VI_H
+#include "Types.h"
+
+struct VIInfo
+{
+    u32 width, height;
+    f32 rwidth, rheight;
+    u32 lastOrigin;
+
+    u32 realWidth, realHeight;
+
+    struct{
+        u32 start, end;
+    } display[16];
+
+    u32 displayNum;
+
+};
+
+extern VIInfo VI;
+
+void VI_UpdateSize();
+void VI_UpdateScreen();
+
+#endif
+
diff --git a/source/gles2n64/src/convert.h b/source/gles2n64/src/convert.h
new file mode 100644 (file)
index 0000000..dbed5e7
--- /dev/null
@@ -0,0 +1,338 @@
+#ifndef CONVERT_H
+#define CONVERT_H
+
+#include "Types.h"
+
+const volatile unsigned char Five2Eight[32] =
+{
+      0, // 00000 = 00000000
+      8, // 00001 = 00001000
+     16, // 00010 = 00010000
+     25, // 00011 = 00011001
+     33, // 00100 = 00100001
+     41, // 00101 = 00101001
+     49, // 00110 = 00110001
+     58, // 00111 = 00111010
+     66, // 01000 = 01000010
+     74, // 01001 = 01001010
+     82, // 01010 = 01010010
+     90, // 01011 = 01011010
+     99, // 01100 = 01100011
+    107, // 01101 = 01101011
+    115, // 01110 = 01110011
+    123, // 01111 = 01111011
+    132, // 10000 = 10000100
+    140, // 10001 = 10001100
+    148, // 10010 = 10010100
+    156, // 10011 = 10011100
+    165, // 10100 = 10100101
+    173, // 10101 = 10101101
+    181, // 10110 = 10110101
+    189, // 10111 = 10111101
+    197, // 11000 = 11000101
+    206, // 11001 = 11001110
+    214, // 11010 = 11010110
+    222, // 11011 = 11011110
+    230, // 11100 = 11100110
+    239, // 11101 = 11101111
+    247, // 11110 = 11110111
+    255  // 11111 = 11111111
+};
+
+const volatile unsigned char Four2Eight[16] =
+{
+      0, // 0000 = 00000000
+     17, // 0001 = 00010001
+     34, // 0010 = 00100010
+     51, // 0011 = 00110011
+     68, // 0100 = 01000100
+     85, // 0101 = 01010101
+    102, // 0110 = 01100110
+    119, // 0111 = 01110111
+    136, // 1000 = 10001000
+    153, // 1001 = 10011001
+    170, // 1010 = 10101010
+    187, // 1011 = 10111011
+    204, // 1100 = 11001100
+    221, // 1101 = 11011101
+    238, // 1110 = 11101110
+    255  // 1111 = 11111111
+};
+
+const volatile unsigned char Three2Four[8] =
+{
+     0, // 000 = 0000
+     2, // 001 = 0010
+     4, // 010 = 0100
+     6, // 011 = 0110
+     9, // 100 = 1001
+    11, // 101 = 1011
+    13, // 110 = 1101
+    15, // 111 = 1111
+};
+
+const volatile unsigned char Three2Eight[8] =
+{
+      0, // 000 = 00000000
+     36, // 001 = 00100100
+     73, // 010 = 01001001
+    109, // 011 = 01101101
+    146, // 100 = 10010010
+    182, // 101 = 10110110
+    219, // 110 = 11011011
+    255, // 111 = 11111111
+};
+const volatile unsigned char Two2Eight[4] =
+{
+      0, // 00 = 00000000
+     85, // 01 = 01010101
+    170, // 10 = 10101010
+    255  // 11 = 11111111
+};
+
+const volatile unsigned char One2Four[2] =
+{
+     0, // 0 = 0000
+    15, // 1 = 1111
+};
+
+const volatile unsigned char One2Eight[2] =
+{
+      0, // 0 = 00000000
+    255, // 1 = 11111111
+};
+
+static inline void UnswapCopy( void *src, void *dest, u32 numBytes )
+{
+    // copy leading bytes
+    int leadingBytes = ((long)src) & 3;
+    if (leadingBytes != 0)
+    {
+        leadingBytes = 4-leadingBytes;
+        if ((unsigned int)leadingBytes > numBytes)
+            leadingBytes = numBytes;
+        numBytes -= leadingBytes;
+
+        src = (void *)((long)src ^ 3);
+        for (int i = 0; i < leadingBytes; i++)
+        {
+            *(u8 *)(dest) = *(u8 *)(src);
+            dest = (void *)((long)dest+1);
+            src  = (void *)((long)src -1);
+        }
+        src = (void *)((long)src+5);
+    }
+
+    // copy dwords
+    int numDWords = numBytes >> 2;
+    while (numDWords--)
+    {
+        u32 dword = *(u32 *)src;
+#ifdef ARM_ASM
+        asm("rev %0, %0" : "+r"(dword)::);
+#else
+        dword = ((dword<<24)|((dword<<8)&0x00FF0000)|((dword>>8)&0x0000FF00)|(dword>>24));
+#endif
+        *(u32 *)dest = dword;
+        dest = (void *)((long)dest+4);
+        src  = (void *)((long)src +4);
+    }
+
+    // copy trailing bytes
+    int trailingBytes = numBytes & 3;
+    if (trailingBytes)
+    {
+        src = (void *)((long)src ^ 3);
+        for (int i = 0; i < trailingBytes; i++)
+        {
+            *(u8 *)(dest) = *(u8 *)(src);
+            dest = (void *)((long)dest+1);
+            src  = (void *)((long)src -1);
+        }
+    }
+}
+
+static inline void DWordInterleave( void *mem, u32 numDWords )
+{
+    int tmp;
+    while( numDWords-- )
+    {
+        tmp = *(int *)((long)mem + 0);
+        *(int *)((long)mem + 0) = *(int *)((long)mem + 4);
+        *(int *)((long)mem + 4) = tmp;
+        mem = (void *)((long)mem + 8);
+    }
+}
+
+inline void QWordInterleave( void *mem, u32 numDWords )
+{
+    numDWords >>= 1; // qwords
+    while( numDWords-- )
+    {
+        int tmp0, tmp1;
+        tmp0 = *(int *)((long)mem + 0);
+        tmp1 = *(int *)((long)mem + 4);
+        *(int *)((long)mem + 0) = *(int *)((long)mem + 8);
+        *(int *)((long)mem + 8) = tmp0;
+        *(int *)((long)mem + 4) = *(int *)((long)mem + 12);
+        *(int *)((long)mem + 12) = tmp1;
+        mem = (void *)((long)mem + 16);
+    }
+}
+
+
+inline u32 swapdword( u32 value )
+{
+#ifdef ARM_ASM
+    asm("rev %0, %0" : "+r"(value)::);
+    return value;
+#else
+    return ((value & 0xff000000) >> 24) |
+           ((value & 0x00ff0000) >>  8) |
+           ((value & 0x0000ff00) <<  8) |
+           ((value & 0x000000ff) << 24);
+#endif
+}
+
+inline u16 swapword( u16 value )
+{
+#ifdef ARM_ASM
+    asm("rev16 %0, %0" : "+r"(value)::);
+    return value;
+#else
+    return (value << 8) | (value >> 8);
+#endif
+}
+
+inline u16 RGBA8888_RGBA4444( u32 color )
+{
+    return ((color & 0x000000f0) <<  8) |   // r
+           ((color & 0x0000f000) >>  4) |   // g
+           ((color & 0x00f00000) >> 16) |   // b
+           ((color & 0xf0000000) >> 28);    // a
+}
+
+inline u32 RGBA5551_RGBA8888( u16 color )
+{
+    color = swapword( color );
+    u8 r, g, b, a;
+    r = Five2Eight[color >> 11];
+    g = Five2Eight[(color >> 6) & 0x001f];
+    b = Five2Eight[(color >> 1) & 0x001f];
+    a = One2Eight [(color     ) & 0x0001];
+    return (a << 24) | (b << 16) | (g << 8) | r;
+}
+
+// Just swaps the word
+inline u16 RGBA5551_RGBA5551( u16 color )
+{
+    return swapword( color );
+}
+
+inline u32 IA88_RGBA8888( u16 color )
+{
+    u8 a = color >> 8;
+    u8 i = color & 0x00FF;
+    return (a << 24) | (i << 16) | (i << 8) | i;
+}
+
+inline u16 IA88_RGBA4444( u16 color )
+{
+    u8 i = color >> 12;
+    u8 a = (color >> 4) & 0x000F;
+    return (i << 12) | (i << 8) | (i << 4) | a;
+}
+
+inline u16 IA44_RGBA4444( u8 color )
+{
+    return ((color & 0xf0) << 8) | ((color & 0xf0) << 4) | (color);
+}
+
+inline u32 IA44_RGBA8888( u8 color )
+{
+    u8 i = Four2Eight[color >> 4];
+    u8 a = Four2Eight[color & 0x0F];
+    return (a << 24) | (i << 16) | (i << 8) | i;
+}
+
+inline u16 IA44_IA88( u8 color )
+{
+    u8 i = Four2Eight[color >> 4];
+    u8 a = Four2Eight[color & 0x0F];
+    return (a << 8) | i;
+}
+
+inline u16 IA31_RGBA4444( u8 color )
+{
+    u8 i = Three2Four[color >> 1];
+    u8 a = One2Four[color & 0x01];
+    return (i << 12) | (i << 8) | (i << 4) | a;
+}
+
+inline u16 IA31_IA88( u8 color )
+{
+    u8 i = Three2Eight[color >> 1];
+    u8 a = One2Eight[color & 0x01];
+    return (a << 8) | i;
+}
+
+inline u32 IA31_RGBA8888( u8 color )
+{
+    u8 i = Three2Eight[color >> 1];
+    u8 a = One2Eight[color & 0x01];
+    return (i << 24) | (i << 16) | (i << 8) | a;
+}
+
+inline u16 I8_RGBA4444( u8 color )
+{
+    u8 c = color >> 4;
+    return (c << 12) | (c << 8) | (c << 4) | c;
+}
+
+inline u32 I8_RGBA8888( u8 color )
+{
+    return (color << 24) | (color << 16) | (color << 8) | color;
+}
+
+inline u16 I4_RGBA4444( u8 color )
+{
+    u16 ret = color & 0x0f;
+    ret |= ret << 4;
+    ret |= ret << 8;
+    return ret;
+}
+
+inline u8 I4_I8( u8 color )
+{
+    return Four2Eight[color & 0x0f];
+}
+
+inline u16 I4_IA88( u8 color )
+{
+    u32 c = Four2Eight[color & 0x0f];
+    return (c << 8) | c;
+}
+
+inline u16 I8_IA88( u8 color )
+{
+    return (color << 8) | color;
+}
+
+
+inline u16 IA88_IA88( u16 color )
+{
+    u8 a = (color&0xFF);
+    u8 i = (color>>8);
+    return  (i << 8) | a;
+}
+
+
+inline u32 I4_RGBA8888( u8 color )
+{
+    u8 c = Four2Eight[color];
+    c |= c << 4;
+    return (c << 24) | (c << 16) | (c << 8) | c;
+}
+
+#endif // CONVERT_H
+
diff --git a/source/gles2n64/src/eglport.cpp b/source/gles2n64/src/eglport.cpp
new file mode 100755 (executable)
index 0000000..b6c5e2a
--- /dev/null
@@ -0,0 +1,706 @@
+/**
+ *
+ *  EGLPORT.C
+ *  Copyright (C) 2011-2013 Scott R. Smith
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ *  THE SOFTWARE.
+ *
+ */
+
+#include "eglport.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define USE_EGL_SDL 1
+#define USE_GLES2      1
+
+#if defined(USE_EGL_SDL)
+#include "SDL.h"
+#include "SDL_syswm.h"
+SDL_SysWMinfo sysWmInfo;      /** Holds our X Display/Window information */
+#endif /* USE_EGL_SDL */
+
+#if defined(PANDORA) /* Pandora VSync Support */
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/fb.h>
+
+#ifndef FBIO_WAITFORVSYNC
+#define FBIO_WAITFORVSYNC _IOW('F', 0x20, __u32)
+#endif
+int fbdev = -1;
+
+#elif defined(RPI)
+#include "bcm_host.h"
+#endif /* PANDORA */
+
+enum EGL_RENDER_T {
+    RENDER_RAW=0,           /** Sets render mode to raw or framebuffer mode. */
+    RENDER_SDL,             /** Sets render mode to X11/SDL mode. */
+    RENDER_TOTAL
+};
+
+enum EGL_SETTINGS_T {
+    CFG_MODE=0,             /** Render mode for EGL 0=RAW 1=SDL. */
+    CFG_VSYNC,              /** Controls system vsync if available. */
+    CFG_FSAA,               /** Number of samples for full screen AA. 0 is off, 2/4 samples. */
+    CFG_FPS,                /** Calculate and report frame per second. */
+    CFG_RED_SIZE,           /** Number of bits of Red in the color buffer. */
+    CFG_GREEN_SIZE,         /** Number of bits of Green in the color buffer. */
+    CFG_BLUE_SIZE,          /** Number of bits of Blue in the color buffer. */
+    CFG_ALPHA_SIZE,         /** Number of bits of Alpha in the color buffer. */
+    CFG_DEPTH_SIZE,         /** Number of bits of Z in the depth buffer. */
+    CFG_BUFFER_SIZE,        /** The total color component bits in the color buffer. */
+    CFG_STENCIL_SIZE,       /** Number of bits of Stencil in the stencil buffer. */
+    CFG_TOTAL               /** Total number of settings. */
+};
+
+NativeDisplayType   nativeDisplay = 0;      /** Reference to the systems native display */
+NativeWindowType    nativeWindow  = 0;      /** Reference to the systems native window */
+EGLint              eglSettings[CFG_TOTAL]; /** Stores setting values. */
+EGLDisplay          eglDisplay    = NULL;   /** Reference to the EGL display */
+EGLConfig           eglConfig     = NULL;   /** Reference to the EGL config */
+EGLContext          eglContext    = NULL;   /** Reference to the EGL context */
+EGLSurface          eglSurface    = NULL;   /** Reference to the EGL surface */
+
+#define     totalConfigsIn 5                /** Total number of configurations to request */
+EGLint      totalConfigsFound = 0;          /** Total number of configurations matching attributes */
+EGLConfig   eglConfigs[totalConfigsIn];     /** Structure containing references to matching configurations */
+
+uint32_t    fpsCount    = 0;                /** Total number of frames counted */
+uint32_t    fpsTime     = 0;                /** Start time of frame count measurment */
+
+int8_t eglColorbits    = 0;
+int8_t eglDepthbits    = 0;
+int8_t eglStencilbits  = 0;
+
+
+/** Private API */
+void        OpenCfg                 ( const char* file );
+int8_t      ConfigureEGL            ( EGLConfig config );
+int8_t      FindEGLConfigs          ( void );
+int8_t      CheckEGLErrors          ( const char* file, uint16_t line );
+
+int8_t      GetNativeDisplay        ( void );
+int8_t      GetNativeWindow         ( uint16_t width, uint16_t height );
+void        FreeNativeDisplay       ( void );
+void        FreeNativeWindow        ( void );
+
+void        Platform_Open           ( void );
+void        Platform_Close          ( void );
+void        Platform_VSync          ( void );
+uint32_t    Platform_GetTicks       ( void );
+
+/** @brief Release all EGL and system resources
+ */
+void EGL_Close( void )
+{
+    /* Release EGL resources */
+    if (eglDisplay != NULL)
+    {
+        peglMakeCurrent( eglDisplay, NULL, NULL, EGL_NO_CONTEXT );
+        if (eglContext != NULL) {
+            peglDestroyContext( eglDisplay, eglContext );
+        }
+        if (eglSurface != NULL) {
+            peglDestroySurface( eglDisplay, eglSurface );
+        }
+        peglTerminate( eglDisplay );
+    }
+
+    eglSurface = NULL;
+    eglContext = NULL;
+    eglDisplay = NULL;
+       
+       eglColorbits = 0;
+       eglDepthbits = 0;
+       eglStencilbits = 0;
+
+    /* Release platform resources */
+    FreeNativeWindow();
+    FreeNativeDisplay();
+    Platform_Close();
+
+    CheckEGLErrors( __FILE__, __LINE__ );
+
+    printf( "EGLport: Closed\n" );
+}
+
+/** @brief Swap the surface buffer onto the display
+ */
+void EGL_SwapBuffers( void )
+{
+    if (eglSettings[CFG_VSYNC] != 0) {
+        Platform_VSync();
+    }
+
+    peglSwapBuffers( eglDisplay, eglSurface );
+
+    if (eglSettings[CFG_FPS] != 0) {
+        fpsCount++;
+
+        if (fpsTime - Platform_GetTicks() >= 1000)
+        {
+            printf( "EGLport: %d fps\n", fpsCount );
+            fpsTime = Platform_GetTicks();
+            fpsCount = 0;
+        }
+    }
+}
+
+/** @brief Obtain the system display and initialize EGL
+ * @param width : desired pixel width of the window (not used by all platforms)
+ * @param height : desired pixel height of the window (not used by all platforms)
+ * @return : 0 if the function passed, else 1
+ */
+int8_t EGL_Open( uint16_t width, uint16_t height )
+{
+    EGLint eglMajorVer, eglMinorVer;
+    EGLBoolean result;
+    uint32_t configIndex = 0;
+    const char* output;
+
+    static const EGLint contextAttribs[] =
+    {
+#if defined(USE_GLES2)
+          EGL_CONTEXT_CLIENT_VERSION,     2,
+#endif
+          EGL_NONE
+    };
+
+#if defined(DEBUG)
+    printf( "EGLport Warning: DEBUG is enabled which may effect performance\n" );
+#endif
+
+    /* Check that system is not open */
+    if (eglDisplay != NULL || eglContext != NULL || eglSurface != NULL)
+    {
+        printf( "EGLport ERROR: EGL system is already open!\n" );
+        return 1;
+    }
+
+    /* Check for the cfg file to alternative settings */
+    OpenCfg( "eglport.cfg" );
+
+    /* Setup any platform specific bits */
+    Platform_Open();
+
+    printf( "EGLport: Opening EGL display\n" );
+    if (GetNativeDisplay() != 0)
+    {
+        printf( "EGLport ERROR: Unable to obtain native display!\n" );
+        return 1;
+    }
+
+    eglDisplay = peglGetDisplay( nativeDisplay );
+    if (eglDisplay == EGL_NO_DISPLAY)
+    {
+        CheckEGLErrors( __FILE__, __LINE__ );
+        printf( "EGLport ERROR: Unable to create EGL display.\n" );
+        return 1;
+    }
+
+    printf( "EGLport: Initializing\n" );
+    result = peglInitialize( eglDisplay, &eglMajorVer, &eglMinorVer );
+    if (result != EGL_TRUE )
+    {
+        CheckEGLErrors( __FILE__, __LINE__ );
+        printf( "EGLport ERROR: Unable to initialize EGL display.\n" );
+        return 1;
+    }
+
+    /* Get EGL Library Information */
+    printf( "EGL Implementation Version: Major %d Minor %d\n", eglMajorVer, eglMinorVer );
+    output = peglQueryString( eglDisplay, EGL_VENDOR );
+    printf( "EGL_VENDOR: %s\n", output );
+    output = peglQueryString( eglDisplay, EGL_VERSION );
+    printf( "EGL_VERSION: %s\n", output );
+    output = peglQueryString( eglDisplay, EGL_EXTENSIONS );
+    printf( "EGL_EXTENSIONS: %s\n", output );
+
+    if (FindEGLConfigs() != 0)
+    {
+        printf( "EGLport ERROR: Unable to configure EGL. See previous error.\n" );
+        return 1;
+    }
+
+    printf( "EGLport: Using Config %d\n", configIndex );
+#if defined(EGL_VERSION_1_2)
+    /* Bind GLES and create the context */
+    printf( "EGLport: Binding API\n" );
+    result = peglBindAPI( EGL_OPENGL_ES_API );
+    if ( result == EGL_FALSE )
+    {
+        CheckEGLErrors( __FILE__, __LINE__ );
+        printf( "EGLport ERROR: Could not bind EGL API.\n" );
+        return 1;
+    }
+#endif /* EGL_VERSION_1_2 */
+
+    printf( "EGLport: Creating Context\n" );
+    eglContext = peglCreateContext( eglDisplay, eglConfigs[configIndex], NULL, contextAttribs );
+    if (eglContext == EGL_NO_CONTEXT)
+    {
+        CheckEGLErrors( __FILE__, __LINE__ );
+        printf( "EGLport ERROR: Unable to create GLES context!\n");
+        return 1;
+    }
+
+    printf( "EGLport: Creating window surface\n" );
+    if (GetNativeWindow( width, height ) != 0)
+    {
+        printf( "EGLport ERROR: Unable to obtain native window!\n" );
+        return 1;
+    }
+
+    eglSurface = peglCreateWindowSurface( eglDisplay, eglConfigs[configIndex], nativeWindow, 0 );
+    if (eglSurface == EGL_NO_SURFACE)
+    {
+        CheckEGLErrors( __FILE__, __LINE__ );
+        printf( "EGLport ERROR: Unable to create EGL surface!\n" );
+        return 1;
+    }
+
+    printf( "EGLport: Making Current\n" );
+    result = peglMakeCurrent( eglDisplay,  eglSurface,  eglSurface, eglContext );
+    if (result != EGL_TRUE)
+    {
+        CheckEGLErrors( __FILE__, __LINE__ );
+        printf( "EGLport ERROR: Unable to make GLES context current\n" );
+        return 1;
+    }
+
+       {
+         EGLint color, depth, stencil;
+         eglGetConfigAttrib(eglDisplay, eglConfigs[configIndex], EGL_BUFFER_SIZE, &color);
+         eglGetConfigAttrib(eglDisplay, eglConfigs[configIndex], EGL_DEPTH_SIZE, &depth);
+         eglGetConfigAttrib(eglDisplay, eglConfigs[configIndex], EGL_STENCIL_SIZE, &stencil);
+         eglColorbits = (color==16)?5:8; //quick hack
+         eglDepthbits = depth;
+         eglStencilbits = stencil;
+       }
+
+    printf( "EGLport: Setting swap interval\n" );
+    peglSwapInterval( eglDisplay, (eglSettings[CFG_VSYNC] > 0) ? 1 : 0 );
+
+    printf( "EGLport: Complete\n" );
+
+    CheckEGLErrors( __FILE__, __LINE__ );
+       
+    return 0;
+}
+
+/** @brief Read settings that configure how to use EGL
+ * @param file : name of the config file
+ */
+void OpenCfg ( const char* file )
+{
+    #define MAX_STRING 20
+    #define MAX_SIZE 100
+    uint8_t i;
+    FILE* fp = NULL;
+    char* location = NULL;
+    char eglStrings[CFG_TOTAL][MAX_STRING];
+    char buffer[MAX_SIZE];
+
+    strncpy( eglStrings[CFG_MODE], "egl_mode=", MAX_STRING );
+    strncpy( eglStrings[CFG_VSYNC], "use_vsync=", MAX_STRING );
+    strncpy( eglStrings[CFG_FSAA], "use_fsaa=", MAX_STRING );
+    strncpy( eglStrings[CFG_RED_SIZE], "size_red=", MAX_STRING );
+    strncpy( eglStrings[CFG_GREEN_SIZE], "size_green=", MAX_STRING );
+    strncpy( eglStrings[CFG_BLUE_SIZE], "size_blue=", MAX_STRING );
+    strncpy( eglStrings[CFG_ALPHA_SIZE], "size_alpha=", MAX_STRING );
+    strncpy( eglStrings[CFG_DEPTH_SIZE], "size_depth=", MAX_STRING );
+    strncpy( eglStrings[CFG_BUFFER_SIZE], "size_buffer=", MAX_STRING );
+    strncpy( eglStrings[CFG_STENCIL_SIZE], "size_stencil=", MAX_STRING );
+
+    /* Set defaults */
+#if defined(USE_EGL_SDL)
+    eglSettings[CFG_MODE]           = RENDER_SDL;
+#else
+    eglSettings[CFG_MODE]           = RENDER_RAW;
+#endif
+    eglSettings[CFG_VSYNC]          = 0;
+    eglSettings[CFG_FSAA]           = 0;
+    eglSettings[CFG_FPS]            = 0;
+    eglSettings[CFG_RED_SIZE]       = 5;
+    eglSettings[CFG_GREEN_SIZE]     = 6;
+    eglSettings[CFG_BLUE_SIZE]      = 5;
+    eglSettings[CFG_ALPHA_SIZE]     = 0;
+    eglSettings[CFG_DEPTH_SIZE]     = 16;
+    eglSettings[CFG_BUFFER_SIZE]    = 16;
+    eglSettings[CFG_STENCIL_SIZE]   = 0;
+
+    /* Parse INI file */
+    fp = fopen( file, "r");
+    if (fp != NULL)
+    {
+        while (fgets( buffer, MAX_SIZE, fp ) != NULL)
+        {
+            for (i=0; i<CFG_TOTAL; i++)
+            {
+                location = strstr( buffer, eglStrings[i] );
+                if (location != NULL)
+                {
+                    eglSettings[i] = atol( location+strlen( eglStrings[i] ) );
+                    printf( "EGLport: %s set to %d.\n", eglStrings[i], eglSettings[i] );
+                    break;
+                }
+            }
+        }
+
+        fclose( fp );
+    }
+    else
+    {
+        printf( "EGL ERROR: Unable to read ini settings from file '%s'. Using defaults\n", file );
+    }
+}
+
+/** @brief Find a EGL configuration tht matches the defined attributes
+ * @return : 0 if the function passed, else 1
+ */
+int8_t FindEGLConfigs( void )
+{
+    EGLBoolean result;
+    int attrib = 0;
+    EGLint ConfigAttribs[23];
+
+    ConfigAttribs[attrib++] = EGL_RED_SIZE;                         /* 1 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_RED_SIZE];            /* 2 */
+    ConfigAttribs[attrib++] = EGL_GREEN_SIZE;                       /* 3 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_GREEN_SIZE];          /* 4 */
+    ConfigAttribs[attrib++] = EGL_BLUE_SIZE;                        /* 5 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_BLUE_SIZE];           /* 6 */
+    ConfigAttribs[attrib++] = EGL_ALPHA_SIZE;                       /* 7 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_ALPHA_SIZE];          /* 8 */
+    ConfigAttribs[attrib++] = EGL_DEPTH_SIZE;                       /* 9 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_DEPTH_SIZE];          /* 10 */
+    ConfigAttribs[attrib++] = EGL_BUFFER_SIZE;                      /* 11 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_BUFFER_SIZE];         /* 12 */
+    ConfigAttribs[attrib++] = EGL_STENCIL_SIZE;                     /* 13 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_STENCIL_SIZE];        /* 14 */
+    ConfigAttribs[attrib++] = EGL_SURFACE_TYPE;                     /* 15 */
+    ConfigAttribs[attrib++] = EGL_WINDOW_BIT;                       /* 16 */
+#if defined(EGL_VERSION_1_2)
+    ConfigAttribs[attrib++] = EGL_RENDERABLE_TYPE;                  /* 17 */
+#if defined(USE_GLES1)
+    ConfigAttribs[attrib++] = EGL_OPENGL_ES_BIT;
+#elif defined(USE_GLES2)
+    ConfigAttribs[attrib++] = EGL_OPENGL_ES2_BIT;                   /* 18 */
+#endif /* USE_GLES1 */
+#endif /* EGL_VERSION_1_2 */
+    ConfigAttribs[attrib++] = EGL_SAMPLE_BUFFERS;                   /* 19 */
+    ConfigAttribs[attrib++] = (eglSettings[CFG_FSAA] > 0) ? 1 : 0;  /* 20 */
+    ConfigAttribs[attrib++] = EGL_SAMPLES;                          /* 21 */
+    ConfigAttribs[attrib++] = eglSettings[CFG_FSAA];                /* 22 */
+    ConfigAttribs[attrib++] = EGL_NONE;                             /* 23 */
+
+    result = peglChooseConfig( eglDisplay, ConfigAttribs, eglConfigs, totalConfigsIn, &totalConfigsFound );
+    if (result != EGL_TRUE || totalConfigsFound == 0)
+    {
+        CheckEGLErrors( __FILE__, __LINE__ );
+        printf( "EGLport ERROR: Unable to query for available configs, found %d.\n", totalConfigsFound );
+        return 1;
+    }
+    printf( "EGLport: Found %d available configs\n", totalConfigsFound );
+
+    return 0;
+}
+
+/** @brief Error checking function
+ * @param file : string reference that contains the source file that the check is occuring in
+ * @param line : numeric reference that contains the line number that the check is occuring in
+ * @return : 0 if the function passed, else 1
+ */
+int8_t CheckEGLErrors( const char* file, uint16_t line )
+{
+    EGLenum error;
+    const char* errortext;
+    const char* description;
+
+    error = eglGetError();
+
+    if (error != EGL_SUCCESS && error != 0)
+    {
+        switch (error)
+        {
+            case EGL_NOT_INITIALIZED:
+                errortext   = "EGL_NOT_INITIALIZED.";
+                description = "EGL is not or could not be initialized, for the specified display.";
+                break;
+            case EGL_BAD_ACCESS:
+                errortext   = "EGL_BAD_ACCESS EGL";
+                description = "cannot access a requested resource (for example, a context is bound in another thread).";
+                break;
+            case EGL_BAD_ALLOC:
+                errortext   = "EGL_BAD_ALLOC EGL";
+                description = "failed to allocate resources for the requested operation.";
+                break;
+            case EGL_BAD_ATTRIBUTE:
+                errortext   = "EGL_BAD_ATTRIBUTE";
+                description = "An unrecognized attribute or attribute value was passed in anattribute list.";
+                break;
+            case EGL_BAD_CONFIG:
+                errortext   = "EGL_BAD_CONFIG";
+                description = "An EGLConfig argument does not name a valid EGLConfig.";
+                break;
+            case EGL_BAD_CONTEXT:
+                errortext   = "EGL_BAD_CONTEXT";
+                description = "An EGLContext argument does not name a valid EGLContext.";
+                break;
+            case EGL_BAD_CURRENT_SURFACE:
+                errortext   = "EGL_BAD_CURRENT_SURFACE";
+                description = "The current surface of the calling thread is a window, pbuffer,or pixmap that is no longer valid.";
+                break;
+            case EGL_BAD_DISPLAY:
+                errortext   = "EGL_BAD_DISPLAY";
+                description = "An EGLDisplay argument does not name a valid EGLDisplay.";
+                break;
+            case EGL_BAD_MATCH:
+                errortext   = "EGL_BAD_MATCH";
+                description = "Arguments are inconsistent; for example, an otherwise valid context requires buffers (e.g. depth or stencil) not allocated by an otherwise valid surface.";
+                break;
+            case EGL_BAD_NATIVE_PIXMAP:
+                errortext   = "EGL_BAD_NATIVE_PIXMAP";
+                description = "An EGLNativePixmapType argument does not refer to a validnative pixmap.";
+                break;
+            case EGL_BAD_NATIVE_WINDOW:
+                errortext   = "EGL_BAD_NATIVE_WINDOW";
+                description = "An EGLNativeWindowType argument does not refer to a validnative window.";
+                break;
+            case EGL_BAD_PARAMETER:
+                errortext   = "EGL_BAD_PARAMETER";
+                description = "One or more argument values are invalid.";
+                break;
+            case EGL_BAD_SURFACE:
+                errortext   = "EGL_BAD_SURFACE";
+                description = "An EGLSurface argument does not name a valid surface (window,pbuffer, or pixmap) configured for rendering";
+                break;
+            case EGL_CONTEXT_LOST:
+                errortext   = "EGL_CONTEXT_LOST";
+                description = "A power management event has occurred. The application mustdestroy all contexts and reinitialise client API state and objects to continue rendering.";
+                break;
+            default:
+                errortext   = "Unknown EGL Error";
+                description = "";
+                break;
+        }
+
+        printf( "EGLport ERROR: EGL Error detected in file %s at line %d: %s (0x%X)\n  Description: %s\n", file, line, errortext, error, description );
+        return 1;
+    }
+
+    return 0;
+}
+
+/** @brief Obtain a reference to the system's native display
+ * @param window : pointer to save the display reference
+ * @return : 0 if the function passed, else 1
+ */
+int8_t GetNativeDisplay( void )
+{
+    if (eglSettings[CFG_MODE] == RENDER_RAW)        /* RAW FB mode */
+    {
+        printf( "EGLport: Using EGL_DEFAULT_DISPLAY\n" );
+        nativeDisplay = EGL_DEFAULT_DISPLAY;
+    }
+    else if (eglSettings[CFG_MODE] == RENDER_SDL)   /* SDL/X11 mode */
+    {
+#if defined(USE_EGL_SDL)
+        printf( "EGLport: Opening SDL/X11 display\n" );
+        SDL_VERSION(&sysWmInfo.version);
+        SDL_GetWMInfo(&sysWmInfo);
+        nativeDisplay = (EGLNativeDisplayType)sysWmInfo.info.x11.display;
+
+        if (nativeDisplay == 0)
+        {
+            printf( "EGLport ERROR: unable to get display!\n" );
+            return 1;
+        }
+#else
+        printf( "EGLport ERROR: SDL mode was not enabled in this compile!\n" );
+#endif
+    }
+
+    return 0;
+}
+
+/** @brief Obtain a reference to the system's native window
+ * @param width : desired pixel width of the window (not used by all platforms)
+ * @param height : desired pixel height of the window (not used by all platforms)
+ * @return : 0 if the function passed, else 1
+ */
+int8_t GetNativeWindow( uint16_t width, uint16_t height )
+{
+    nativeWindow = 0;
+
+#if defined(WIZ) || defined(CAANOO)
+
+    nativeWindow = (NativeWindowType)malloc(16*1024);
+
+    if(nativeWindow == NULL) {
+        printf( "EGLport ERROR: Memory for window Failed\n" );
+        return 1;
+    }
+
+#elif defined(RPI)
+
+    EGLBoolean result;
+    uint32_t screen_width, screen_height;
+    static EGL_DISPMANX_WINDOW_T nativewindow;
+    DISPMANX_ELEMENT_HANDLE_T dispman_element;
+    DISPMANX_DISPLAY_HANDLE_T dispman_display;
+    DISPMANX_UPDATE_HANDLE_T dispman_update;
+    VC_RECT_T dst_rect;
+    VC_RECT_T src_rect;
+
+    /* create an EGL window surface */
+    result = graphics_get_display_size(0 /* LCD */, &screen_width, &screen_height);
+    if(result < 0) {
+        printf( "EGLport ERROR: RPi graphicget_display_size failed\n" );
+        return 1;
+    }
+
+    dst_rect.x = 0;
+    dst_rect.y = 0;
+    dst_rect.width = screen_width;
+    dst_rect.height = screen_height;
+
+    src_rect.x = 0;
+    src_rect.y = 0;
+    src_rect.width = width << 16;
+    src_rect.height = height << 16;
+
+    dispman_display = vc_dispmanx_display_open( 0 /* LCD */);
+    dispman_update  = vc_dispmanx_update_start( 0 );
+    dispman_element = vc_dispmanx_element_add ( dispman_update, dispman_display,
+      0 /*layer*/, &dst_rect, 0 /*src*/,
+      &src_rect, DISPMANX_PROTECTION_NONE,  (VC_DISPMANX_ALPHA_T*)0 /*alpha*/,  (DISPMANX_CLAMP_T*)0 /*clamp*/,  (DISPMANX_TRANSFORM_T)0 /*transform*/);
+
+    nativewindow.element = dispman_element;
+    nativewindow.width = screen_width;
+    nativewindow.height = screen_height;
+    vc_dispmanx_update_submit_sync( dispman_update );
+
+    nativeWindow = (NativeWindowType)&nativewindow;
+
+#else /* default */
+
+    if (eglSettings[CFG_MODE] == RENDER_RAW)        /* RAW FB mode */
+    {
+        nativeWindow = 0;
+    }
+    else if(eglSettings[CFG_MODE] == RENDER_SDL)    /* SDL/X11 mode */
+    {
+#if defined(USE_EGL_SDL)
+        /* SDL_GetWMInfo is populated when display was opened */
+        nativeWindow = (NativeWindowType)sysWmInfo.info.x11.window;
+
+        if (nativeWindow == 0)
+        {
+            printf( "EGLport ERROR: unable to get window!\n" );
+            return 1;
+        }
+#else
+        printf( "EGLport ERROR: SDL mode was not enabled in this compile!\n" );
+#endif
+    }
+    else
+    {
+        printf( "EGLport ERROR: Unknown EGL render mode %d!\n", eglSettings[CFG_MODE] );
+        return 1;
+    }
+
+#endif /* WIZ / CAANOO */
+
+    return 0;
+}
+
+/** @brief Release the system's native display
+ */
+void FreeNativeDisplay( void )
+{
+}
+
+/** @brief Release the system's native window
+ */
+void FreeNativeWindow( void )
+{
+#if defined(WIZ) || defined(CAANOO)
+    if (nativeWindow != NULL) {
+        free( nativeWindow );
+    }
+    nativeWindow = NULL;
+#endif /* WIZ / CAANOO */
+}
+
+/** @brief Open any system specific resources
+ */
+void Platform_Open( void )
+{
+#if defined(PANDORA)
+    /* Pandora VSync */
+    fbdev = open( "/dev/fb0", O_RDONLY /* O_RDWR */ );
+    if ( fbdev < 0 ) {
+        printf( "EGLport ERROR: Couldn't open /dev/fb0 for Pandora Vsync\n" );
+    }
+#elif defined(RPI)
+    bcm_host_init();
+#endif /* PANDORA */
+}
+
+/** @brief Release any system specific resources
+ */
+void Platform_Close( void )
+{
+#if defined(PANDORA)
+    /* Pandora VSync */
+    close( fbdev );
+    fbdev = -1;
+#endif /* PANDORA */
+}
+
+/** @brief Check the systems vsync state
+ */
+void Platform_VSync( void )
+{
+#if defined(PANDORA)
+    /* Pandora VSync */
+    if (fbdev >= 0) {
+        int arg = 0;
+        ioctl( fbdev, FBIO_WAITFORVSYNC, &arg );
+    }
+#endif /* PANDORA */
+}
+
+/** @brief Get the system tick time (ms)
+ */
+uint32_t Platform_GetTicks( void )
+{
+    uint32_t ticks = 0;
+#if defined(USE_EGL_SDL)
+    ticks = SDL_GetTicks();
+#else
+    printf( "EGLport ERROR: SDL mode was not enabled in this compile!\n" );
+#endif
+    return ticks;
+}
diff --git a/source/gles2n64/src/eglport.h b/source/gles2n64/src/eglport.h
new file mode 100755 (executable)
index 0000000..736456c
--- /dev/null
@@ -0,0 +1,108 @@
+/**
+ *
+ *  EGLPORT.H
+ *  Copyright (C) 2011-2013 Scott R. Smith
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ *  THE SOFTWARE.
+ *
+ */
+
+#ifndef EGLPORT_H
+#define EGLPORT_H
+
+#include <stdint.h>
+#include "EGL/egl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Defines (in every case choose only one) */
+/**     Common: */
+/**         DEBUG : enable additional error monitoring per EGL function call */
+/**     Native display and window system for use with EGL */
+/**         USE_EGL_SDL : used for access to a SDL X11 window */
+/**     Platform: settings that are specific to that device */
+/**         PANDORA (USE_GLES1 or USE_GLES2) */
+/**         WIZ     (USE_GLES1) */
+/**         CAANOO  (USE_GLES1) */
+/**         RPI     (USE_GLES1 or USE_GLES2) */
+/**     GLES Version */
+/**         USE_GLES1 : EGL for use with OpenGL-ES 1.X contexts */
+/**         USE_GLES2 : EGL for use with OpenGL-ES 2.0 contexts */
+
+/** Public API */
+void    EGL_Close                   ( void );
+int8_t  EGL_Open                    ( uint16_t width, uint16_t height );
+void    EGL_SwapBuffers             ( void );
+
+extern int8_t  eglColorbits;
+extern int8_t  eglDepthbits;
+extern int8_t  eglStencilbits;
+
+/** Simple Examples  */
+/**     Raw mode:
+            EGL_Open( window_width, window_height );
+            do while(!quit) {
+                ... run app
+                EGL_SwapBuffers();
+            }
+            EGL_Close();
+*/
+/**     X11/SDL mode:
+            SDL_Init( SDL_INIT_VIDEO );
+            SDL_Surface* screen = SDL_SetVideoMode(640, 480, 16, SDL_SWSURFACE|SDL_FULLSCREEN);
+            EGL_Open( window_width, window_height );
+            do while(!quit) {
+                ... run app
+                EGL_SwapBuffers();
+            }
+            EGL_Close();
+            SDL_Quit();
+*/
+
+#if defined(DEBUG)
+#define GET_EGLERROR(FUNCTION)               \
+    FUNCTION;                                \
+    {                                        \
+        CheckEGLErrors(__FILE__, __LINE__);  \
+    }
+#else
+#define GET_EGLERROR(FUNCTION) FUNCTION;
+#endif
+
+#define peglQueryString(A,B)                    GET_EGLERROR(eglQueryString(A,B))
+#define peglDestroyContext(A,B)                 GET_EGLERROR(eglDestroyContext(A,B))
+#define peglDestroySurface(A,B)                 GET_EGLERROR(eglDestroySurface(A,B))
+#define peglTerminate(A)                        GET_EGLERROR(eglTerminate(A))
+#define peglSwapBuffers(A,B)                    GET_EGLERROR(eglSwapBuffers(A,B))
+#define peglGetDisplay(A)                       GET_EGLERROR(eglGetDisplay(A))
+#define peglBindAPI(A)                          GET_EGLERROR(eglBindAPI(A))
+#define peglCreateContext(A,B,C,D)              GET_EGLERROR(eglCreateContext(A,B,C,D))
+#define peglCreateWindowSurface(A,B,C,D)        GET_EGLERROR(eglCreateWindowSurface(A,B,C,D))
+#define peglInitialize(A,B,C)                   GET_EGLERROR(eglInitialize(A,B,C))
+#define peglMakeCurrent(A,B,C,D)                GET_EGLERROR(eglMakeCurrent(A,B,C,D))
+#define peglChooseConfig(A,B,C,D,E)             GET_EGLERROR(eglChooseConfig(A,B,C,D,E))
+#define peglSwapInterval(A,B)                   GET_EGLERROR(eglSwapInterval(A,B))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* EGLPORT_H */
diff --git a/source/gles2n64/src/gDP.cpp b/source/gles2n64/src/gDP.cpp
new file mode 100644 (file)
index 0000000..afe65ff
--- /dev/null
@@ -0,0 +1,970 @@
+#include <stdlib.h>
+
+#include "gles2N64.h"
+#include "N64.h"
+#include "GBI.h"
+#include "RSP.h"
+#include "gDP.h"
+#include "gSP.h"
+#include "Types.h"
+#include "Debug.h"
+#include "convert.h"
+#include "OpenGL.h"
+#include "CRC.h"
+#include "DepthBuffer.h"
+#include "VI.h"
+#include "Config.h"
+
+
+//thank rice_video for this:
+bool _IsRenderTexture()
+{
+    bool foundSetScissor=false;
+    bool foundFillRect=false;
+    bool foundSetFillColor=false;
+    bool foundSetCImg=false;
+    bool foundTxtRect=false;
+    int height;
+    unsigned int newFillColor = 0;
+    unsigned int dwPC = RSP.PC[RSP.PCi];       // This points to the next instruction
+
+    for(int i=0; i<10; i++ )
+    {
+        unsigned int w0 = *(unsigned int *)(RDRAM + dwPC + i*8);
+        unsigned int w1 = *(unsigned int *)(RDRAM + dwPC + 4 + i*8);
+
+        if ((w0>>24) == G_SETSCISSOR)
+        {
+            height = ((w1>>0 )&0xFFF)/4;
+            foundSetScissor = true;
+            continue;
+        }
+
+        if ((w0>>24) == G_SETFILLCOLOR)
+        {
+            height = ((w1>>0 )&0xFFF)/4;
+            foundSetFillColor = true;
+            newFillColor = w1;
+            continue;
+        }
+
+        if ((w0>>24) == G_FILLRECT)
+        {
+            unsigned int x0 = ((w1>>12)&0xFFF)/4;
+            unsigned int y0 = ((w1>>0 )&0xFFF)/4;
+            unsigned int x1 = ((w0>>12)&0xFFF)/4;
+            unsigned int y1 = ((w0>>0 )&0xFFF)/4;
+
+            if (x0 == 0 && y0 == 0)
+            {
+                if( x1 == gDP.colorImage.width)
+                {
+                    height = y1;
+                    foundFillRect = true;
+                    continue;
+                }
+
+                if(x1 == (unsigned int)(gDP.colorImage.width-1))
+                {
+                    height = y1+1;
+                    foundFillRect = true;
+                    continue;
+                }
+            }
+        }
+
+        if ((w0>>24) == G_TEXRECT)
+        {
+            foundTxtRect = true;
+            break;
+        }
+
+        if ((w0>>24) == G_SETCIMG)
+        {
+            foundSetCImg = true;
+            break;
+        }
+    }
+
+    if (foundFillRect )
+    {
+        if (foundSetFillColor)
+        {
+            if (newFillColor != 0xFFFCFFFC)
+                return true;    // this is a render_texture
+            else
+                return false;
+        }
+
+        if (gDP.fillColor.i == 0x00FFFFF7)
+            return true;    // this is a render_texture
+        else
+            return false;   // this is a normal ZImg
+    }
+    else if (foundSetFillColor && newFillColor == 0xFFFCFFFC && foundSetCImg )
+    {
+        return false;
+    }
+    else
+        return true;
+
+
+    if (!foundSetCImg) return true;
+
+    if (foundSetScissor ) return true;
+
+    return false;
+}
+
+gDPInfo gDP;
+
+void gDPSetOtherMode( u32 mode0, u32 mode1 )
+{
+    gDP.otherMode.h = mode0;
+    gDP.otherMode.l = mode1;
+    gDP.changed |= CHANGED_RENDERMODE | CHANGED_CYCLETYPE | CHANGED_ALPHACOMPARE;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetOtherMode( %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | %s, %s | %s | %s%s%s%s%s | %s | %s%s%s );\n",
+        AlphaDitherText[gDP.otherMode.alphaDither],
+        ColorDitherText[gDP.otherMode.colorDither],
+        CombineKeyText[gDP.otherMode.combineKey],
+        TextureConvertText[gDP.otherMode.textureConvert],
+        TextureFilterText[gDP.otherMode.textureFilter],
+        TextureLUTText[gDP.otherMode.textureLUT],
+        TextureLODText[gDP.otherMode.textureLOD],
+        TextureDetailText[gDP.otherMode.textureDetail],
+        TexturePerspText[gDP.otherMode.texturePersp],
+        CycleTypeText[gDP.otherMode.cycleType],
+        PipelineModeText[gDP.otherMode.pipelineMode],
+        AlphaCompareText[gDP.otherMode.alphaCompare],
+        DepthSourceText[gDP.otherMode.depthSource],
+        gDP.otherMode.AAEnable ? "AA_EN | " : "",
+        gDP.otherMode.depthCompare ? "Z_CMP | " : "",
+        gDP.otherMode.depthUpdate ? "Z_UPD | " : "",
+        gDP.otherMode.imageRead ? "IM_RD | " : "",
+        CvgDestText[gDP.otherMode.cvgDest],
+        DepthModeText[gDP.otherMode.depthMode],
+        gDP.otherMode.cvgXAlpha ? "CVG_X_ALPHA | " : "",
+        gDP.otherMode.alphaCvgSel ? "ALPHA_CVG_SEL | " : "",
+        gDP.otherMode.forceBlender ? "FORCE_BL" : "" );
+#endif
+}
+
+void gDPSetPrimDepth( u16 z, u16 dz )
+{
+    z = z&0x7FFF;
+
+    //gDP.primDepth.z = (_FIXED2FLOAT( z, 15 ) - gSP.viewport.vtrans[2]) / gSP.viewport.vscale[2] ;
+    gDP.primDepth.z = (z - gSP.viewport.vtrans[2]) / gSP.viewport.vscale[2] ;
+    gDP.primDepth.deltaZ = dz;
+    gDP.changed |= CHANGED_PRIMITIVEZ;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetPrimDepth( %f, %f );\n",
+        gDP.primDepth.z,
+        gDP.primDepth.deltaZ);
+#endif
+}
+
+void gDPPipelineMode( u32 mode )
+{
+    gDP.otherMode.pipelineMode = mode;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPPipelineMode( %s );\n",
+        PipelineModeText[gDP.otherMode.pipelineMode] );
+#endif
+}
+
+void gDPSetCycleType( u32 type )
+{
+    gDP.otherMode.cycleType = type;
+    gDP.changed |= CHANGED_CYCLETYPE;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetCycleType( %s );\n",
+        CycleTypeText[gDP.otherMode.cycleType] );
+#endif
+}
+
+void gDPSetTexturePersp( u32 enable )
+{
+    gDP.otherMode.texturePersp = enable;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTexturePersp( %s );\n",
+        TexturePerspText[gDP.otherMode.texturePersp] );
+#endif
+}
+
+void gDPSetTextureDetail( u32 type )
+{
+    gDP.otherMode.textureDetail = type;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureDetail( %s );\n",
+        TextureDetailText[gDP.otherMode.textureDetail] );
+#endif
+}
+
+void gDPSetTextureLOD( u32 mode )
+{
+    gDP.otherMode.textureLOD = mode;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureLOD( %s );\n",
+        TextureLODText[gDP.otherMode.textureLOD] );
+#endif
+}
+
+void gDPSetTextureLUT( u32 mode )
+{
+    gDP.otherMode.textureLUT = mode;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureLUT( %s );\n",
+        TextureLUTText[gDP.otherMode.textureLUT] );
+#endif
+}
+
+void gDPSetTextureFilter( u32 type )
+{
+    gDP.otherMode.textureFilter = type;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureFilter( %s );\n",
+        TextureFilterText[gDP.otherMode.textureFilter] );
+#endif
+}
+
+void gDPSetTextureConvert( u32 type )
+{
+    gDP.otherMode.textureConvert = type;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureConvert( %s );\n",
+        TextureConvertText[gDP.otherMode.textureConvert] );
+#endif
+}
+
+void gDPSetCombineKey( u32 type )
+{
+    gDP.otherMode.combineKey = type;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetCombineKey( %s );\n",
+        CombineKeyText[gDP.otherMode.combineKey] );
+#endif
+}
+
+void gDPSetColorDither( u32 type )
+{
+    gDP.otherMode.colorDither = type;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetColorDither( %s );\n",
+        ColorDitherText[gDP.otherMode.colorDither] );
+#endif
+}
+
+void gDPSetAlphaDither( u32 type )
+{
+    gDP.otherMode.alphaDither = type;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetAlphaDither( %s );\n",
+        AlphaDitherText[gDP.otherMode.alphaDither] );
+#endif
+}
+
+void gDPSetAlphaCompare( u32 mode )
+{
+    gDP.otherMode.alphaCompare = mode;
+    gDP.changed |= CHANGED_ALPHACOMPARE;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetAlphaCompare( %s );\n",
+        AlphaCompareText[gDP.otherMode.alphaCompare] );
+#endif
+}
+
+void gDPSetDepthSource( u32 source )
+{
+    gDP.otherMode.depthSource = source;
+    gDP.changed |= CHANGED_DEPTHSOURCE;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetDepthSource( %s );\n",
+        DepthSourceText[gDP.otherMode.depthSource] );
+#endif
+}
+
+void gDPSetRenderMode( u32 mode1, u32 mode2 )
+{
+    gDP.otherMode.l &= 0x00000007;
+    gDP.otherMode.l |= mode1 | mode2;
+    gDP.changed |= CHANGED_RENDERMODE;
+
+#ifdef DEBUG
+    // THIS IS INCOMPLETE!!!
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetRenderMode( %s%s%s%s%s | %s | %s%s%s );\n",
+        gDP.otherMode.AAEnable ? "AA_EN | " : "",
+        gDP.otherMode.depthCompare ? "Z_CMP | " : "",
+        gDP.otherMode.depthUpdate ? "Z_UPD | " : "",
+        gDP.otherMode.imageRead ? "IM_RD | " : "",
+        CvgDestText[gDP.otherMode.cvgDest],
+        DepthModeText[gDP.otherMode.depthMode],
+        gDP.otherMode.cvgXAlpha ? "CVG_X_ALPHA | " : "",
+        gDP.otherMode.alphaCvgSel ? "ALPHA_CVG_SEL | " : "",
+        gDP.otherMode.forceBlender ? "FORCE_BL" : "" );
+#endif
+}
+
+void gDPSetCombine( s32 muxs0, s32 muxs1 )
+{
+    gDP.combine.muxs0 = muxs0;
+    gDP.combine.muxs1 = muxs1;
+    gDP.changed |= CHANGED_COMBINE;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetCombine( %s, %s, %s, %s, %s, %s, %s, %s,\n",
+        saRGBText[gDP.combine.saRGB0],
+        sbRGBText[gDP.combine.sbRGB0],
+        mRGBText[gDP.combine.mRGB0],
+        aRGBText[gDP.combine.aRGB0],
+        saAText[gDP.combine.saA0],
+        sbAText[gDP.combine.sbA0],
+        mAText[gDP.combine.mA0],
+        aAText[gDP.combine.aA0] );
+
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "               %s, %s, %s, %s, %s, %s, %s, %s );\n",
+        saRGBText[gDP.combine.saRGB1],
+        sbRGBText[gDP.combine.sbRGB1],
+        mRGBText[gDP.combine.mRGB1],
+        aRGBText[gDP.combine.aRGB1],
+        saAText[gDP.combine.saA1],
+        sbAText[gDP.combine.sbA1],
+        mAText[gDP.combine.mA1],
+        aAText[gDP.combine.aA1] );
+
+#endif
+}
+
+void gDPSetColorImage( u32 format, u32 size, u32 width, u32 address )
+{
+    if (config.updateMode == SCREEN_UPDATE_AT_CI_CHANGE)
+        OGL_SwapBuffers();
+
+    if (config.updateMode == SCREEN_UPDATE_AT_1ST_CI_CHANGE && OGL.screenUpdate)
+        OGL_SwapBuffers();
+
+    u32 addr = RSP_SegmentToPhysical( address );
+
+    if (gDP.colorImage.address != addr)
+    {
+        gDP.colorImage.changed = FALSE;
+        if (width == VI.width)
+            gDP.colorImage.height = VI.height;
+        else
+            gDP.colorImage.height = 1;
+    }
+
+    gDP.colorImage.format = format;
+    gDP.colorImage.size = size;
+    gDP.colorImage.width = width;
+    gDP.colorImage.address = addr;
+
+    if (config.ignoreOffscreenRendering)
+    {
+        int i;
+
+        //colorimage byte size:
+        //color image height is not the best thing to base this on, its normally set
+        //later on in the code
+
+        if (gDP.colorImage.address == gDP.depthImageAddress)
+        {
+            OGL.renderingToTexture = false;
+        }
+        else if (size == G_IM_SIZ_16b && format == G_IM_FMT_RGBA)
+        {
+            int s = 0;
+            switch(size)
+            {
+                case G_IM_SIZ_4b:   s = (gDP.colorImage.width * gDP.colorImage.height) / 2; break;
+                case G_IM_SIZ_8b:   s = (gDP.colorImage.width * gDP.colorImage.height); break;
+                case G_IM_SIZ_16b:  s = (gDP.colorImage.width * gDP.colorImage.height) * 2; break;
+                case G_IM_SIZ_32b:  s = (gDP.colorImage.width * gDP.colorImage.height) * 4; break;
+            }
+            u32 start = addr & 0x00FFFFFF;
+            u32 end = min(start + s, RDRAMSize);
+            for(i = 0; i < VI.displayNum; i++)
+            {
+                if (VI.display[i].start <= end && VI.display[i].start >= start) break;
+                if (start <= VI.display[i].end && start >= VI.display[i].start) break;
+            }
+
+            OGL.renderingToTexture = (i == VI.displayNum);
+        }
+        else
+        {
+            OGL.renderingToTexture = true;
+        }
+
+#if 0
+        if (OGL.renderingToTexture)
+        {
+            printf("start=%i end=%i\n", start, end);
+            printf("display=");
+            for(int i=0; i< VI.displayNum; i++) printf("%i,%i:", VI.display[i].start, VI.display[i].end);
+            printf("\n");
+        }
+#endif
+    }
+    else
+    {
+        OGL.renderingToTexture = false;
+    }
+
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetColorImage( %s, %s, %i, 0x%08X );\n",
+        ImageFormatText[gDP.colorImage.format],
+        ImageSizeText[gDP.colorImage.size],
+        gDP.colorImage.width,
+        gDP.colorImage.address );
+#endif
+}
+
+void gDPSetTextureImage( u32 format, u32 size, u32 width, u32 address )
+{
+    gDP.textureImage.format = format;
+    gDP.textureImage.size = size;
+    gDP.textureImage.width = width;
+    gDP.textureImage.address = RSP_SegmentToPhysical( address );
+    gDP.textureImage.bpl = gDP.textureImage.width << gDP.textureImage.size >> 1;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureImage( %s, %s, %i, 0x%08X );\n",
+        ImageFormatText[gDP.textureImage.format],
+        ImageSizeText[gDP.textureImage.size],
+        gDP.textureImage.width,
+        gDP.textureImage.address );
+#endif
+}
+
+void gDPSetDepthImage( u32 address )
+{
+//  if (address != gDP.depthImageAddress)
+//      OGL_ClearDepthBuffer();
+
+    u32 addr = RSP_SegmentToPhysical(address);
+    DepthBuffer_SetBuffer(addr);
+
+    if (depthBuffer.current->cleared)
+        OGL_ClearDepthBuffer();
+
+    gDP.depthImageAddress = addr;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetDepthImage( 0x%08X );\n", gDP.depthImageAddress );
+#endif
+}
+
+void gDPSetEnvColor( u32 r, u32 g, u32 b, u32 a )
+{
+    gDP.envColor.r = r * 0.0039215689f;
+    gDP.envColor.g = g * 0.0039215689f;
+    gDP.envColor.b = b * 0.0039215689f;
+    gDP.envColor.a = a * 0.0039215689f;
+
+    gDP.changed |= CHANGED_ENV_COLOR;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetEnvColor( %u, %u, %u, %u );\n",
+        r, g, b, a );
+#endif
+}
+
+void gDPSetBlendColor( u32 r, u32 g, u32 b, u32 a )
+{
+    gDP.blendColor.r = r * 0.0039215689f;
+    gDP.blendColor.g = g * 0.0039215689f;
+    gDP.blendColor.b = b * 0.0039215689f;
+    gDP.blendColor.a = a * 0.0039215689f;
+    gDP.changed |= CHANGED_BLENDCOLOR;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetBlendColor( %u, %u, %u, %u );\n",
+        r, g, b, a );
+#endif
+}
+
+void gDPSetFogColor( u32 r, u32 g, u32 b, u32 a )
+{
+    gDP.fogColor.r = r * 0.0039215689f;
+    gDP.fogColor.g = g * 0.0039215689f;
+    gDP.fogColor.b = b * 0.0039215689f;
+    gDP.fogColor.a = a * 0.0039215689f;
+
+    gDP.changed |= CHANGED_FOGCOLOR;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetFogColor( %u, %u, %u, %u );\n",
+        r, g, b, a );
+#endif
+}
+
+void gDPSetFillColor( u32 c )
+{
+
+    gDP.fillColor.i = c;
+    gDP.fillColor.r = _SHIFTR( c, 11, 5 ) * 0.032258064f;
+    gDP.fillColor.g = _SHIFTR( c,  6, 5 ) * 0.032258064f;
+    gDP.fillColor.b = _SHIFTR( c,  1, 5 ) * 0.032258064f;
+    gDP.fillColor.a = _SHIFTR( c,  0, 1 );
+
+    gDP.fillColor.z = _SHIFTR( c,  2, 14 );
+    gDP.fillColor.dz = _SHIFTR( c, 0, 2 );
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetFillColor( 0x%08X );\n", c );
+#endif
+}
+
+void gDPSetPrimColor( u32 m, u32 l, u32 r, u32 g, u32 b, u32 a )
+{
+    gDP.primColor.m = m;
+    gDP.primColor.l = l * 0.0039215689f;
+    gDP.primColor.r = r * 0.0039215689f;
+    gDP.primColor.g = g * 0.0039215689f;
+    gDP.primColor.b = b * 0.0039215689f;
+    gDP.primColor.a = a * 0.0039215689f;
+
+    gDP.changed |= CHANGED_PRIM_COLOR;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetPrimColor( %u, %u, %u, %u, %u, %u );\n",
+        m, l, r, g, b, a );
+#endif
+}
+
+void gDPSetTile( u32 format, u32 size, u32 line, u32 tmem, u32 tile, u32 palette, u32 cmt, u32 cms, u32 maskt, u32 masks, u32 shiftt, u32 shifts )
+{
+    if (((size == G_IM_SIZ_4b) || (size == G_IM_SIZ_8b)) && (format == G_IM_FMT_RGBA))
+        format = G_IM_FMT_CI;
+
+    gDP.tiles[tile].format = format;
+    gDP.tiles[tile].size = size;
+    gDP.tiles[tile].line = line;
+    gDP.tiles[tile].tmem = tmem;
+    gDP.tiles[tile].palette = palette;
+    gDP.tiles[tile].cmt = cmt;
+    gDP.tiles[tile].cms = cms;
+    gDP.tiles[tile].maskt = maskt;
+    gDP.tiles[tile].masks = masks;
+    gDP.tiles[tile].shiftt = shiftt;
+    gDP.tiles[tile].shifts = shifts;
+
+    if (!gDP.tiles[tile].masks) gDP.tiles[tile].clamps = 1;
+    if (!gDP.tiles[tile].maskt) gDP.tiles[tile].clampt = 1;
+}
+
+void gDPSetTileSize( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt )
+{
+    gDP.tiles[tile].uls = _SHIFTR( uls, 2, 10 );
+    gDP.tiles[tile].ult = _SHIFTR( ult, 2, 10 );
+    gDP.tiles[tile].lrs = _SHIFTR( lrs, 2, 10 );
+    gDP.tiles[tile].lrt = _SHIFTR( lrt, 2, 10 );
+
+    gDP.tiles[tile].fuls = _FIXED2FLOAT( uls, 2 );
+    gDP.tiles[tile].fult = _FIXED2FLOAT( ult, 2 );
+    gDP.tiles[tile].flrs = _FIXED2FLOAT( lrs, 2 );
+    gDP.tiles[tile].flrt = _FIXED2FLOAT( lrt, 2 );
+
+    gDP.changed |= CHANGED_TILE;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTileSize( %u, %.2f, %.2f, %.2f, %.2f );\n",
+        tile,
+        gDP.tiles[tile].fuls,
+        gDP.tiles[tile].fult,
+        gDP.tiles[tile].flrs,
+        gDP.tiles[tile].flrt );
+#endif
+}
+
+void gDPLoadTile( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt )
+{
+    void (*Interleave)( void *mem, u32 numDWords );
+
+    u32 address, height, bpl, line, y;
+    u64 *dest;
+    u8 *src;
+
+    gDPSetTileSize( tile, uls, ult, lrs, lrt );
+    gDP.loadTile = &gDP.tiles[tile];
+
+    if (gDP.loadTile->line == 0)
+        return;
+
+    address = gDP.textureImage.address + gDP.loadTile->ult * gDP.textureImage.bpl + (gDP.loadTile->uls << gDP.textureImage.size >> 1);
+    dest = &TMEM[gDP.loadTile->tmem];
+    bpl = (gDP.loadTile->lrs - gDP.loadTile->uls + 1) << gDP.loadTile->size >> 1;
+    height = gDP.loadTile->lrt - gDP.loadTile->ult + 1;
+    src = &RDRAM[address];
+
+    if (((address + height * bpl) > RDRAMSize) ||
+        (((gDP.loadTile->tmem << 3) + bpl * height) > 4096)) // Stay within TMEM
+    {
+#ifdef DEBUG
+        DebugMsg( DEBUG_HIGH | DEBUG_ERROR | DEBUG_TEXTURE, "// Attempting to load texture tile out of range\n" );
+        DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTile( %u, %i, %i, %i, %i );\n",
+            tile, gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt );
+#endif
+        return;
+    }
+
+    // Line given for 32-bit is half what it seems it should since they split the
+    // high and low words. I'm cheating by putting them together.
+    if (gDP.loadTile->size == G_IM_SIZ_32b)
+    {
+        line = gDP.loadTile->line << 1;
+        Interleave = QWordInterleave;
+    }
+    else
+    {
+        line = gDP.loadTile->line;
+        Interleave = DWordInterleave;
+    }
+
+    for (y = 0; y < height; y++)
+    {
+        UnswapCopy( src, dest, bpl );
+        if (y & 1) Interleave( dest, line );
+
+        src += gDP.textureImage.bpl;
+        dest += line;
+    }
+
+    gDP.textureMode = TEXTUREMODE_NORMAL;
+    gDP.loadType = LOADTYPE_TILE;
+    gDP.changed |= CHANGED_TMEM;
+
+#ifdef DEBUG
+        DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTile( %u, %i, %i, %i, %i );\n",
+            tile, gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt );
+#endif
+}
+
+void gDPLoadBlock( u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt )
+{
+    gDPSetTileSize( tile, uls, ult, lrs, dxt );
+    gDP.loadTile = &gDP.tiles[tile];
+
+    u32 bytes = (lrs + 1) << gDP.loadTile->size >> 1;
+    u32 address = gDP.textureImage.address + ult * gDP.textureImage.bpl + (uls << gDP.textureImage.size >> 1);
+
+    if ((bytes == 0) ||
+        ((address + bytes) > RDRAMSize) ||
+        (((gDP.loadTile->tmem << 3) + bytes) > 4096))
+    {
+#ifdef DEBUG
+        DebugMsg( DEBUG_HIGH | DEBUG_ERROR | DEBUG_TEXTURE, "// Attempting to load texture block out of range\n" );
+        DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadBlock( %u, %u, %u, %u, %u );\n",
+            tile, uls, ult, lrs, dxt );
+#endif
+//      bytes = min( bytes, min( RDRAMSize - gDP.textureImage.address, 4096 - (gDP.loadTile->tmem << 3) ) );
+        return;
+    }
+
+    u64* src = (u64*)&RDRAM[address];
+    u64* dest = &TMEM[gDP.loadTile->tmem];
+
+    if (dxt > 0)
+    {
+        u32 line = (2047 + dxt) / dxt;
+        u32 bpl = line << 3;
+        u32 height = bytes / bpl;
+
+        if (gDP.loadTile->size == G_IM_SIZ_32b)
+        {
+            for (u32 y = 0; y < height; y++)
+            {
+                UnswapCopy( src, dest, bpl );
+                if (y & 1) QWordInterleave( dest, line );
+                src += line;
+                dest += line;
+            }
+        }
+        else
+        {
+            for (u32 y = 0; y < height; y++)
+            {
+                UnswapCopy( src, dest, bpl );
+                if (y & 1) DWordInterleave( dest, line );
+                src += line;
+                dest += line;
+            }
+
+        }
+
+    }
+    else
+        UnswapCopy( src, dest, bytes );
+
+    gDP.textureMode = TEXTUREMODE_NORMAL;
+    gDP.loadType = LOADTYPE_BLOCK;
+    gDP.changed |= CHANGED_TMEM;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadBlock( %u, %u, %u, %u, %u );\n",
+        tile, uls, ult, lrs, dxt );
+#endif
+}
+
+void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt )
+{
+    gDPSetTileSize( tile, uls, ult, lrs, lrt );
+
+    u16 count = (gDP.tiles[tile].lrs - gDP.tiles[tile].uls + 1) * (gDP.tiles[tile].lrt - gDP.tiles[tile].ult + 1);
+    u32 address = gDP.textureImage.address + gDP.tiles[tile].ult * gDP.textureImage.bpl + (gDP.tiles[tile].uls << gDP.textureImage.size >> 1);
+
+    u16 *dest = (u16*)&TMEM[gDP.tiles[tile].tmem];
+    u16 *src = (u16*)&RDRAM[address];
+
+    u16 pal = (gDP.tiles[tile].tmem - 256) >> 4;
+
+    int i = 0;
+    while (i < count)
+    {
+        for (u16 j = 0; (j < 16) && (i < count); j++, i++)
+        {
+            u16 color = swapword( src[i^1] );
+
+            *dest = color;
+            //dest[1] = color;
+            //dest[2] = color;
+            //dest[3] = color;
+
+            dest += 4;
+        }
+
+        gDP.paletteCRC16[pal] = CRC_CalculatePalette( 0xFFFFFFFF, &TMEM[256 + (pal << 4)], 16 );
+        pal++;
+    }
+
+    gDP.paletteCRC256 = CRC_Calculate( 0xFFFFFFFF, gDP.paletteCRC16, 64 );
+
+    gDP.changed |= CHANGED_TMEM;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTLUT( %u, %i, %i, %i, %i );\n",
+        tile, gDP.tiles[tile].uls, gDP.tiles[tile].ult, gDP.tiles[tile].lrs, gDP.tiles[tile].lrt );
+#endif
+}
+
+void gDPSetScissor( u32 mode, f32 ulx, f32 uly, f32 lrx, f32 lry )
+{
+    gDP.scissor.mode = mode;
+    gDP.scissor.ulx = ulx;
+    gDP.scissor.uly = uly;
+    gDP.scissor.lrx = lrx;
+    gDP.scissor.lry = lry;
+    gDP.changed |= CHANGED_SCISSOR;
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPSetScissor( %s, %.2f, %.2f, %.2f, %.2f );\n",
+        ScissorModeText[gDP.scissor.mode],
+        gDP.scissor.ulx,
+        gDP.scissor.uly,
+        gDP.scissor.lrx,
+        gDP.scissor.lry );
+#endif
+}
+
+void gDPFillRectangle( s32 ulx, s32 uly, s32 lrx, s32 lry )
+{
+    DepthBuffer *buffer = DepthBuffer_FindBuffer( gDP.colorImage.address );
+
+    if (buffer)
+        buffer->cleared = TRUE;
+
+    if (gDP.depthImageAddress == gDP.colorImage.address)
+    {
+        OGL_ClearDepthBuffer();
+        return;
+    }
+
+    if (gDP.otherMode.cycleType == G_CYC_FILL)
+    {
+        lrx++;
+        lry++;
+
+        if ((ulx == 0) && (uly == 0) && ((unsigned int)lrx == VI.width) && ((unsigned int)lry == VI.height))
+        {
+            OGL_ClearColorBuffer( &gDP.fillColor.r );
+            return;
+        }
+    }
+
+    //shouldn't this be primitive color?
+    //OGL_DrawRect( ulx, uly, lrx, lry, (gDP.otherMode.cycleType == G_CYC_FILL) ? &gDP.fillColor.r : &gDP.blendColor.r );
+    //OGL_DrawRect( ulx, uly, lrx, lry, (gDP.otherMode.cycleType == G_CYC_FILL) ? &gDP.fillColor.r : &gDP.primColor.r);
+
+    float black[] = {0,0,0,0};
+    OGL_DrawRect( ulx, uly, lrx, lry, (gDP.otherMode.cycleType == G_CYC_FILL) ? &gDP.fillColor.r : black);
+
+    if (depthBuffer.current) depthBuffer.current->cleared = FALSE;
+    gDP.colorImage.changed = TRUE;
+    gDP.colorImage.height = max( gDP.colorImage.height, (unsigned int)lry );
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPFillRectangle( %i, %i, %i, %i );\n",
+        ulx, uly, lrx, lry );
+#endif
+}
+
+void gDPSetConvert( s32 k0, s32 k1, s32 k2, s32 k3, s32 k4, s32 k5 )
+{
+    gDP.convert.k0 = k0 * 0.0039215689f;
+    gDP.convert.k1 = k1 * 0.0039215689f;
+    gDP.convert.k2 = k2 * 0.0039215689f;
+    gDP.convert.k3 = k3 * 0.0039215689f;
+    gDP.convert.k4 = k4 * 0.0039215689f;
+    gDP.convert.k5 = k5 * 0.0039215689f;
+    gDP.changed |= CHANGED_CONVERT;
+}
+
+void gDPSetKeyR( u32 cR, u32 sR, u32 wR )
+{
+    gDP.key.center.r = cR * 0.0039215689f;;
+    gDP.key.scale.r = sR * 0.0039215689f;;
+    gDP.key.width.r = wR * 0.0039215689f;;
+}
+
+void gDPSetKeyGB(u32 cG, u32 sG, u32 wG, u32 cB, u32 sB, u32 wB )
+{
+    gDP.key.center.g = cG * 0.0039215689f;;
+    gDP.key.scale.g = sG * 0.0039215689f;;
+    gDP.key.width.g = wG * 0.0039215689f;;
+    gDP.key.center.b = cB * 0.0039215689f;;
+    gDP.key.scale.b = sB * 0.0039215689f;;
+    gDP.key.width.b = wB * 0.0039215689f;;
+}
+
+void gDPTextureRectangle( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy )
+{
+    if (gDP.colorImage.address == gDP.depthImageAddress)
+    {
+        return;
+    }
+
+    if (gDP.otherMode.cycleType == G_CYC_COPY)
+    {
+        dsdx = 1.0f;
+        lrx += 1.0f;
+        lry += 1.0f;
+    }
+
+    gSP.textureTile[0] = &gDP.tiles[tile];
+    gSP.textureTile[1] = &gDP.tiles[(tile < 7) ? (tile + 1) : tile];
+
+
+    f32 lrs;
+    f32 lrt;
+    if (RSP.cmd == G_TEXRECTFLIP)
+    {
+        lrs = s + (lry - uly - 1) * dtdy;
+        lrt = t + (lrx - ulx - 1) * dsdx;
+    }
+    else
+    {
+        lrs = s + (lrx - ulx - 1) * dsdx;
+        lrt = t + (lry - uly - 1) * dtdy;
+    }
+
+    if (gDP.textureMode == TEXTUREMODE_NORMAL)
+        gDP.textureMode = TEXTUREMODE_TEXRECT;
+
+    gDP.texRect.width = (unsigned int)(max( lrs, s ) + dsdx);
+    gDP.texRect.height = (unsigned int)(max( lrt, t ) + dtdy);
+
+    float tmp;
+    if (lrs < s)
+    {
+        tmp = ulx; ulx = lrx; lrx = tmp;
+        tmp = s; s = lrs; lrs = tmp;
+    }
+    if (lrt < t)
+    {
+        tmp = uly; uly = lry; lry = tmp;
+        tmp = t; t = lrt; lrt = tmp;
+    }
+
+    OGL_DrawTexturedRect( ulx, uly, lrx, lry, s, t, lrs, lrt, (RSP.cmd == G_TEXRECTFLIP));
+
+    gSP.textureTile[0] = &gDP.tiles[gSP.texture.tile];
+    gSP.textureTile[1] = &gDP.tiles[(gSP.texture.tile < 7) ? (gSP.texture.tile + 1) : gSP.texture.tile];
+
+    if (depthBuffer.current) depthBuffer.current->cleared = FALSE;
+    gDP.colorImage.changed = TRUE;
+    gDP.colorImage.height = (unsigned int)(max( gDP.colorImage.height, gDP.scissor.lry ));
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPTextureRectangle( %f, %f, %f, %f, %i, %f, %f, %f, %f );\n",
+        ulx, uly, lrx, lry, tile, s, t, dsdx, dtdy );
+#endif
+}
+
+void gDPTextureRectangleFlip( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy )
+{
+    //gDPTextureRectangle( ulx, uly, lrx, lry, tile, s + (lrx - ulx) * dsdx, t + (lry - uly) * dtdy, -dsdx, -dtdy );
+
+    gDPTextureRectangle( ulx, uly, lrx, lry, tile, s, t, dsdx, dtdy );
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPTextureRectangleFlip( %f, %f, %f, %f, %i, %f, %f, %f, %f);\n",
+        ulx, uly, lrx, lry, tile, s, t, dsdx, dtdy );
+#endif
+}
+
+void gDPFullSync()
+{
+    *REG.MI_INTR |= MI_INTR_DP;
+
+    CheckInterrupts();
+
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPFullSync();\n" );
+#endif
+}
+
+void gDPTileSync()
+{
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_IGNORED | DEBUG_TEXTURE, "gDPTileSync();\n" );
+#endif
+}
+
+void gDPPipeSync()
+{
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPPipeSync();\n" );
+#endif
+}
+
+void gDPLoadSync()
+{
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPLoadSync();\n" );
+#endif
+}
+
+void gDPNoOp()
+{
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPNoOp();\n" );
+#endif
+}
+
diff --git a/source/gles2n64/src/gDP.h b/source/gles2n64/src/gDP.h
new file mode 100644 (file)
index 0000000..e3d8125
--- /dev/null
@@ -0,0 +1,289 @@
+#ifndef GDP_H
+#define GDP_H
+
+#include "Types.h"
+
+#define CHANGED_RENDERMODE      0x0001
+#define CHANGED_CYCLETYPE       0x0002
+#define CHANGED_SCISSOR         0x0004
+#define CHANGED_TMEM            0x0008
+#define CHANGED_TILE            0x0010
+#define CHANGED_COMBINE_COLORS  0x0020
+#define CHANGED_COMBINE         0x0040
+#define CHANGED_ALPHACOMPARE    0x0080
+#define CHANGED_FOGCOLOR        0x0100
+#define CHANGED_DEPTHSOURCE     0x0200
+#define CHANGED_PRIMITIVEZ      0x0400
+#define CHANGED_ENV_COLOR       0x0800
+#define CHANGED_PRIM_COLOR      0x1000
+#define CHANGED_BLENDCOLOR      0x2000
+#define CHANGED_CONVERT         0x4000
+
+#define TEXTUREMODE_NORMAL      0
+#define TEXTUREMODE_TEXRECT     1
+#define TEXTUREMODE_BGIMAGE     2
+#define TEXTUREMODE_FRAMEBUFFER 3
+
+#define LOADTYPE_BLOCK          0
+#define LOADTYPE_TILE           1
+
+struct gDPCombine
+{
+    union
+    {
+        struct
+        {
+            // muxs1
+            unsigned    aA1     : 3;
+            unsigned    sbA1    : 3;
+            unsigned    aRGB1   : 3;
+            unsigned    aA0     : 3;
+            unsigned    sbA0    : 3;
+            unsigned    aRGB0   : 3;
+            unsigned    mA1     : 3;
+            unsigned    saA1    : 3;
+            unsigned    sbRGB1  : 4;
+            unsigned    sbRGB0  : 4;
+
+            // muxs0
+            unsigned    mRGB1   : 5;
+            unsigned    saRGB1  : 4;
+            unsigned    mA0     : 3;
+            unsigned    saA0    : 3;
+            unsigned    mRGB0   : 5;
+            unsigned    saRGB0  : 4;
+        };
+
+        struct
+        {
+            u32         muxs1, muxs0;
+        };
+
+        u64             mux;
+    };
+};
+
+struct gDPTile
+{
+    u32 format, size, line, tmem, palette;
+
+    union
+    {
+        struct
+        {
+            unsigned    mirrort : 1;
+            unsigned    clampt  : 1;
+            unsigned    pad0    : 30;
+
+            unsigned    mirrors : 1;
+            unsigned    clamps  : 1;
+            unsigned    pad1    : 30;
+        };
+
+        struct
+        {
+            u32 cmt, cms;
+        };
+    };
+
+    //FrameBuffer *frameBuffer;
+    u32 maskt, masks;
+    u32 shiftt, shifts;
+    f32 fuls, fult, flrs, flrt;
+    u32 uls, ult, lrs, lrt;
+};
+
+struct gDPInfo
+{
+    struct
+    {
+        union
+        {
+            struct
+            {
+                unsigned int alphaCompare : 2;
+                unsigned int depthSource : 1;
+
+//              struct
+//              {
+                    unsigned int AAEnable : 1;
+                    unsigned int depthCompare : 1;
+                    unsigned int depthUpdate : 1;
+                    unsigned int imageRead : 1;
+                    unsigned int clearOnCvg : 1;
+
+                    unsigned int cvgDest : 2;
+                    unsigned int depthMode : 2;
+
+                    unsigned int cvgXAlpha : 1;
+                    unsigned int alphaCvgSel : 1;
+                    unsigned int forceBlender : 1;
+                    unsigned int textureEdge : 1;
+//              } renderMode;
+
+                //struct
+                //{
+                    unsigned int c2_m2b : 2;
+                    unsigned int c1_m2b : 2;
+                    unsigned int c2_m2a : 2;
+                    unsigned int c1_m2a : 2;
+                    unsigned int c2_m1b : 2;
+                    unsigned int c1_m1b : 2;
+                    unsigned int c2_m1a : 2;
+                    unsigned int c1_m1a : 2;
+                //} blender;
+
+                unsigned int blendMask : 4;
+                unsigned int alphaDither : 2;
+                unsigned int colorDither : 2;
+
+                unsigned int combineKey : 1;
+                unsigned int textureConvert : 3;
+                unsigned int textureFilter : 2;
+                unsigned int textureLUT : 2;
+
+                unsigned int textureLOD : 1;
+                unsigned int textureDetail : 2;
+                unsigned int texturePersp : 1;
+                unsigned int cycleType : 2;
+                unsigned int unusedColorDither : 1; // unsupported
+                unsigned int pipelineMode : 1;
+
+                unsigned int pad : 8;
+
+            };
+
+            u64         _u64;
+
+            struct
+            {
+                u32         l, h;
+            };
+        };
+    } otherMode;
+
+    gDPCombine combine;
+
+    gDPTile tiles[8], *loadTile;
+
+    struct
+    {
+        f32 r, g, b, a;
+    } fogColor,  blendColor, envColor;
+
+    struct
+    {
+        unsigned int i;
+        f32 r, g, b, a;
+        f32 z, dz;
+    } fillColor;
+
+    struct
+    {
+        u32 m;
+        f32 l, r, g, b, a;
+    } primColor;
+
+    struct
+    {
+        f32 z, deltaZ;
+    } primDepth;
+
+    struct
+    {
+        u32 format, size, width, bpl;
+        u32 address;
+    } textureImage;
+
+    struct
+    {
+        u32 format, size, width, height, bpl;
+        u32 address, changed;
+        u32 depthImage;
+    } colorImage;
+
+    u32 depthImageAddress;
+
+    struct
+    {
+        u32 mode;
+        f32 ulx, uly, lrx, lry;
+    } scissor;
+
+    struct
+    {
+        f32 k0, k1, k2, k3, k4, k5;
+    } convert;
+
+    struct
+    {
+        struct
+        {
+            f32 r, g, b, a;
+        } center, scale, width;
+    } key;
+
+    struct
+    {
+        u32 width, height;
+    } texRect;
+
+    u32 changed;
+
+    //u16 palette[256];
+    u32 paletteCRC16[16];
+    u32 paletteCRC256;
+    u32 half_1, half_2;
+    u32 textureMode;
+    u32 loadType;
+};
+
+extern gDPInfo gDP;
+
+void gDPSetOtherMode( u32 mode0, u32 mode1 );
+void gDPSetPrimDepth( u16 z, u16 dz );
+void gDPPipelineMode( u32 mode );
+void gDPSetCycleType( u32 type );
+void gDPSetTexturePersp( u32 enable );
+void gDPSetTextureDetail( u32 type );
+void gDPSetTextureLOD( u32 mode );
+void gDPSetTextureLUT( u32 mode );
+void gDPSetTextureFilter( u32 type );
+void gDPSetTextureConvert( u32 type );
+void gDPSetCombineKey( u32 type );
+void gDPSetColorDither( u32 type );
+void gDPSetAlphaDither( u32 type );
+void gDPSetAlphaCompare( u32 mode );
+void gDPSetDepthSource( u32 source );
+void gDPSetRenderMode( u32 mode1, u32 mode2 );
+void gDPSetCombine( s32 muxs0, s32 muxs1 );
+void gDPSetColorImage( u32 format, u32 size, u32 width, u32 address );
+void gDPSetTextureImage( u32 format, u32 size, u32 width, u32 address );
+void gDPSetDepthImage( u32 address );
+void gDPSetEnvColor( u32 r, u32 g, u32 b, u32 a );
+void gDPSetBlendColor( u32 r, u32 g, u32 b, u32 a );
+void gDPSetFogColor( u32 r, u32 g, u32 b, u32 a );
+void gDPSetFillColor( u32 c );
+void gDPSetPrimColor( u32 m, u32 l, u32 r, u32 g, u32 b, u32 a );
+void gDPSetTile(u32 format, const u32 size, const u32 line, const u32 tmem, u32 tile,
+               const u32 palette, const u32 cmt, const u32 cms, const u32 maskt, const u32 masks,
+               const u32 shiftt, const u32 shifts );
+void gDPSetTileSize( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt );
+void gDPLoadTile( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt );
+void gDPLoadBlock( u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt );
+void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt );
+void gDPSetScissor( u32 mode, f32 ulx, f32 uly, f32 lrx, f32 lry );
+void gDPFillRectangle( s32 ulx, s32 uly, s32 lrx, s32 lry );
+void gDPSetConvert( s32 k0, s32 k1, s32 k2, s32 k3, s32 k4, s32 k5 );
+void gDPSetKeyR( u32 cR, u32 sR, u32 wR );
+void gDPSetKeyGB(u32 cG, u32 sG, u32 wG, u32 cB, u32 sB, u32 wB );
+void gDPTextureRectangle( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy );
+void gDPTextureRectangleFlip( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy );
+void gDPFullSync();
+void gDPTileSync();
+void gDPPipeSync();
+void gDPLoadSync();
+void gDPNoOp();
+
+#endif
+
diff --git a/source/gles2n64/src/gSP.cpp b/source/gles2n64/src/gSP.cpp
new file mode 100644 (file)
index 0000000..2b845f9
--- /dev/null
@@ -0,0 +1,1738 @@
+#include <math.h>
+#include <stdlib.h>
+
+#include "Common.h"
+#include "gles2N64.h"
+#include "Debug.h"
+#include "Types.h"
+#include "RSP.h"
+#include "GBI.h"
+#include "gSP.h"
+#include "gDP.h"
+#include "3DMath.h"
+#include "OpenGL.h"
+#include "CRC.h"
+#include <string.h>
+#include "convert.h"
+#include "S2DEX.h"
+#include "VI.h"
+#include "DepthBuffer.h"
+#include "Config.h"
+
+//Note: 0xC0 is used by 1080 alot, its an unknown command.
+
+#ifdef DEBUG
+extern u32 uc_crc, uc_dcrc;
+extern char uc_str[256];
+#endif
+
+void gSPCombineMatrices();
+
+//#ifdef __TRIBUFFER_OPT
+void __indexmap_init()
+{
+    memset(OGL.triangles.indexmapinv, 0xFF, VERTBUFF_SIZE*sizeof(u32));
+    for(int i=0;i<INDEXMAP_SIZE;i++)
+    {
+        OGL.triangles.indexmap[i] = i;
+        //OGL.triangles.indexmapinv[i] = i;
+    }
+
+    OGL.triangles.indexmap_prev = -1;
+    OGL.triangles.indexmap_nomap = 0;
+}
+
+void __indexmap_clear()
+{
+    memset(OGL.triangles.indexmapinv, 0xFF, VERTBUFF_SIZE * sizeof(u32));
+    for(int i=0;i<INDEXMAP_SIZE;i++)
+        OGL.triangles.indexmapinv[OGL.triangles.indexmap[i]] = i;
+}
+
+u32 __indexmap_findunused(u32 num)
+{
+    u32 c = 0;
+    u32 i = min(OGL.triangles.indexmap_prev+1, VERTBUFF_SIZE-1);
+    u32 n = 0;
+    while(n < VERTBUFF_SIZE)
+    {
+        c = (OGL.triangles.indexmapinv[i] == 0xFFFFFFFF) ? (c+1) : 0;
+        if ((c == num) && (i < (VERTBUFF_SIZE - num)))
+        {
+            break;
+        }
+        i=i+1;
+        if (i >= VERTBUFF_SIZE) {i=0; c=0;}
+        n++;
+    }
+    return (c == num) ? (i-num+1) : (0xFFFFFFFF);
+}
+
+void __indexmap_undomap()
+{
+    SPVertex tmp[INDEXMAP_SIZE];
+    memset(OGL.triangles.indexmapinv, 0xFF, VERTBUFF_SIZE * sizeof(u32));
+
+    for(int i=0;i<INDEXMAP_SIZE;i++)
+    {
+        u32 ind = OGL.triangles.indexmap[i];
+        tmp[i] = OGL.triangles.vertices[ind];
+        OGL.triangles.indexmap[i] = i;
+        OGL.triangles.indexmapinv[i] = i;
+    }
+
+    memcpy(OGL.triangles.vertices, tmp, INDEXMAP_SIZE * sizeof(SPVertex));
+    OGL.triangles.indexmap_nomap = 1;
+}
+
+u32 __indexmap_getnew(u32 index, u32 num)
+{
+    u32 ind;
+
+    //test to see if unmapped
+    u32 unmapped = 1;
+    for(int i=0;i<num;i++)
+    {
+        if (OGL.triangles.indexmap[i]!=0xFFFFFFFF)
+        {
+            unmapped = 0;
+            break;
+        }
+
+    }
+
+    if (unmapped)
+    {
+        ind = index;
+    }
+    else
+    {
+        ind = __indexmap_findunused(num);
+
+        //no more room in buffer....
+        if (ind > VERTBUFF_SIZE)
+        {
+            OGL_DrawTriangles();
+            ind = __indexmap_findunused(num);
+
+            //OK the indices are spread so sparsely, we cannot find a num element block.
+            if (ind > VERTBUFF_SIZE)
+            {
+                __indexmap_undomap();
+                ind = __indexmap_findunused(num);
+                if (ind > VERTBUFF_SIZE)
+                {
+                    LOG(LOG_ERROR, "Could not allocate %i indices\n", num);
+
+                    LOG(LOG_VERBOSE, "indexmap=[");
+                    for(int i=0;i<INDEXMAP_SIZE;i++)
+                        LOG(LOG_VERBOSE, "%i,", OGL.triangles.indexmap[i]);
+                    LOG(LOG_VERBOSE, "]\n");
+
+                    LOG(LOG_VERBOSE, "indexmapinv=[");
+                    for(int i=0;i<VERTBUFF_SIZE;i++)
+                        LOG(LOG_VERBOSE, "%i,", OGL.triangles.indexmapinv[i]);
+                    LOG(LOG_VERBOSE, "]\n");
+                }
+                return ind;
+            }
+        }
+    }
+
+    for(int i=0;i<num;i++)
+    {
+        OGL.triangles.indexmap[index+i] = ind+i;
+        OGL.triangles.indexmapinv[ind+i] = index+i;
+    }
+
+    OGL.triangles.indexmap_prev = ind+num-1;
+    OGL.triangles.indexmap_nomap = 0;
+
+    return ind;
+}
+//#endif
+
+void gSPTriangle(s32 v0, s32 v1, s32 v2)
+{
+    if ((v0 < INDEXMAP_SIZE) && (v1 < INDEXMAP_SIZE) && (v2 < INDEXMAP_SIZE))
+    {
+
+#ifdef __TRIBUFFER_OPT
+        v0 = OGL.triangles.indexmap[v0];
+        v1 = OGL.triangles.indexmap[v1];
+        v2 = OGL.triangles.indexmap[v2];
+#endif
+
+#if 0
+        // Don't bother with triangles completely outside clipping frustrum
+        if (config.enableClipping)
+        {
+            if (OGL.triangles.vertices[v0].clip & OGL.triangles.vertices[v1].clip & OGL.triangles.vertices[v2].clip)
+            {
+                return;
+            }
+        }
+#endif
+
+        OGL_AddTriangle(v0, v1, v2);
+
+    }
+
+    if (depthBuffer.current) depthBuffer.current->cleared = FALSE;
+    gDP.colorImage.changed = TRUE;
+    gDP.colorImage.height = (unsigned int)(max( gDP.colorImage.height, gDP.scissor.lry ));
+}
+
+void gSP1Triangle( const s32 v0, const s32 v1, const s32 v2)
+{
+    gSPTriangle( v0, v1, v2);
+    gSPFlushTriangles();
+}
+
+void gSP2Triangles(const s32 v00, const s32 v01, const s32 v02, const s32 flag0,
+                    const s32 v10, const s32 v11, const s32 v12, const s32 flag1 )
+{
+    gSPTriangle( v00, v01, v02);
+    gSPTriangle( v10, v11, v12);
+    gSPFlushTriangles();
+}
+
+void gSP4Triangles(const s32 v00, const s32 v01, const s32 v02,
+                    const s32 v10, const s32 v11, const s32 v12,
+                    const s32 v20, const s32 v21, const s32 v22,
+                    const s32 v30, const s32 v31, const s32 v32 )
+{
+    gSPTriangle(v00, v01, v02);
+    gSPTriangle(v10, v11, v12);
+    gSPTriangle(v20, v21, v22);
+    gSPTriangle(v30, v31, v32);
+    gSPFlushTriangles();
+}
+
+
+gSPInfo gSP;
+
+f32 identityMatrix[4][4] =
+{
+    { 1.0f, 0.0f, 0.0f, 0.0f },
+    { 0.0f, 1.0f, 0.0f, 0.0f },
+    { 0.0f, 0.0f, 1.0f, 0.0f },
+    { 0.0f, 0.0f, 0.0f, 1.0f }
+};
+
+#ifdef __VEC4_OPT
+static void gSPTransformVertex4_default(u32 v, float mtx[4][4])
+{
+    float x, y, z, w;
+    int i;
+    for(i = 0; i < 4; i++)
+    {
+        x = OGL.triangles.vertices[v+i].x;
+        y = OGL.triangles.vertices[v+i].y;
+        z = OGL.triangles.vertices[v+i].z;
+        w = OGL.triangles.vertices[v+i].w;
+        OGL.triangles.vertices[v+i].x = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0];
+        OGL.triangles.vertices[v+i].y = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1];
+        OGL.triangles.vertices[v+i].z = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2];
+        OGL.triangles.vertices[v+i].w = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3];
+    }
+}
+
+void gSPClipVertex4(u32 v)
+{
+    int i;
+    for(i = 0; i < 4; i++){
+        SPVertex *vtx = &OGL.triangles.vertices[v+i];
+        vtx->clip = 0;
+        if (vtx->x > +vtx->w)   vtx->clip |= CLIP_POSX;
+        if (vtx->x < -vtx->w)   vtx->clip |= CLIP_NEGX;
+        if (vtx->y > +vtx->w)   vtx->clip |= CLIP_POSY;
+        if (vtx->y < -vtx->w)   vtx->clip |= CLIP_NEGY;
+    }
+}
+
+static void gSPTransformNormal4_default(u32 v, float mtx[4][4])
+{
+    float len, x, y, z;
+    int i;
+    for(i = 0; i < 4; i++){
+        x = OGL.triangles.vertices[v+i].nx;
+        y = OGL.triangles.vertices[v+i].ny;
+        z = OGL.triangles.vertices[v+i].nz;
+
+        OGL.triangles.vertices[v+i].nx = mtx[0][0]*x + mtx[1][0]*y + mtx[2][0]*z;
+        OGL.triangles.vertices[v+i].ny = mtx[0][1]*x + mtx[1][1]*y + mtx[2][1]*z;
+        OGL.triangles.vertices[v+i].nz = mtx[0][2]*x + mtx[1][2]*y + mtx[2][2]*z;
+        len =   OGL.triangles.vertices[v+i].nx*OGL.triangles.vertices[v+i].nx +
+                OGL.triangles.vertices[v+i].ny*OGL.triangles.vertices[v+i].ny +
+                OGL.triangles.vertices[v+i].nz*OGL.triangles.vertices[v+i].nz;
+        if (len != 0.0)
+        {
+            len = sqrtf(len);
+            OGL.triangles.vertices[v+i].nx /= len;
+            OGL.triangles.vertices[v+i].ny /= len;
+            OGL.triangles.vertices[v+i].nz /= len;
+        }
+    }
+}
+
+static void gSPLightVertex4_default(u32 v)
+{
+    gSPTransformNormal4(v, gSP.matrix.modelView[gSP.matrix.modelViewi]);
+    for(int j = 0; j < 4; j++)
+    {
+        f32 r,g,b;
+        r = gSP.lights[gSP.numLights].r;
+        g = gSP.lights[gSP.numLights].g;
+        b = gSP.lights[gSP.numLights].b;
+
+        for (int i = 0; i < gSP.numLights; i++)
+        {
+            f32 intensity = DotProduct( &OGL.triangles.vertices[v+j].nx, &gSP.lights[i].x );
+            if (intensity < 0.0f) intensity = 0.0f;
+/*
+// paulscode, cause of the shader bug (not applying intensity to correct varriables)
+            OGL.triangles.vertices[v+j].r += gSP.lights[i].r * intensity;
+            OGL.triangles.vertices[v+j].g += gSP.lights[i].g * intensity;
+            OGL.triangles.vertices[v+j].b += gSP.lights[i].b * intensity;
+*/
+//// paulscode, shader bug-fix:
+            r += gSP.lights[i].r * intensity;
+            g += gSP.lights[i].g * intensity;
+            b += gSP.lights[i].b * intensity;
+////
+        }
+        OGL.triangles.vertices[v+j].r = min(1.0f, r);
+        OGL.triangles.vertices[v+j].g = min(1.0f, g);
+        OGL.triangles.vertices[v+j].b = min(1.0f, b);
+    }
+}
+
+static void gSPBillboardVertex4_default(u32 v)
+{
+
+    int i = 0;
+#ifdef __TRIBUFFER_OPT
+    i = OGL.triangles.indexmap[0];
+#endif
+
+    OGL.triangles.vertices[v].x += OGL.triangles.vertices[i].x;
+    OGL.triangles.vertices[v].y += OGL.triangles.vertices[i].y;
+    OGL.triangles.vertices[v].z += OGL.triangles.vertices[i].z;
+    OGL.triangles.vertices[v].w += OGL.triangles.vertices[i].w;
+    OGL.triangles.vertices[v+1].x += OGL.triangles.vertices[i].x;
+    OGL.triangles.vertices[v+1].y += OGL.triangles.vertices[i].y;
+    OGL.triangles.vertices[v+1].z += OGL.triangles.vertices[i].z;
+    OGL.triangles.vertices[v+1].w += OGL.triangles.vertices[i].w;
+    OGL.triangles.vertices[v+2].x += OGL.triangles.vertices[i].x;
+    OGL.triangles.vertices[v+2].y += OGL.triangles.vertices[i].y;
+    OGL.triangles.vertices[v+2].z += OGL.triangles.vertices[i].z;
+    OGL.triangles.vertices[v+2].w += OGL.triangles.vertices[i].w;
+    OGL.triangles.vertices[v+3].x += OGL.triangles.vertices[i].x;
+    OGL.triangles.vertices[v+3].y += OGL.triangles.vertices[i].y;
+    OGL.triangles.vertices[v+3].z += OGL.triangles.vertices[i].z;
+    OGL.triangles.vertices[v+3].w += OGL.triangles.vertices[i].w;
+}
+
+void gSPProcessVertex4(u32 v)
+{
+    if (gSP.changed & CHANGED_MATRIX)
+        gSPCombineMatrices();
+
+    gSPTransformVertex4(v, gSP.matrix.combined );
+
+    if (config.screen.flipVertical)
+    {
+        OGL.triangles.vertices[v+0].y = -OGL.triangles.vertices[v+0].y;
+        OGL.triangles.vertices[v+1].y = -OGL.triangles.vertices[v+1].y;
+        OGL.triangles.vertices[v+2].y = -OGL.triangles.vertices[v+2].y;
+        OGL.triangles.vertices[v+3].y = -OGL.triangles.vertices[v+3].y;
+    }
+
+    if (gDP.otherMode.depthSource)
+    {
+        OGL.triangles.vertices[v+0].z = gDP.primDepth.z * OGL.triangles.vertices[v+0].w;
+        OGL.triangles.vertices[v+1].z = gDP.primDepth.z * OGL.triangles.vertices[v+1].w;
+        OGL.triangles.vertices[v+2].z = gDP.primDepth.z * OGL.triangles.vertices[v+2].w;
+        OGL.triangles.vertices[v+3].z = gDP.primDepth.z * OGL.triangles.vertices[v+3].w;
+    }
+
+    if (gSP.matrix.billboard)
+        gSPBillboardVertex4(v);
+
+    if (!(gSP.geometryMode & G_ZBUFFER))
+    {
+        OGL.triangles.vertices[v].z = -OGL.triangles.vertices[v].w;
+        OGL.triangles.vertices[v+1].z = -OGL.triangles.vertices[v+1].w;
+        OGL.triangles.vertices[v+2].z = -OGL.triangles.vertices[v+2].w;
+        OGL.triangles.vertices[v+3].z = -OGL.triangles.vertices[v+3].w;
+    }
+
+    if (gSP.geometryMode & G_LIGHTING)
+    {
+        if (config.enableLighting)
+        {
+            gSPLightVertex4(v);
+        }
+        else
+        {
+            OGL.triangles.vertices[v].r = 1.0f;
+            OGL.triangles.vertices[v].g = 1.0f;
+            OGL.triangles.vertices[v].b = 1.0f;
+            OGL.triangles.vertices[v+1].r = 1.0f;
+            OGL.triangles.vertices[v+1].g = 1.0f;
+            OGL.triangles.vertices[v+1].b = 1.0f;
+            OGL.triangles.vertices[v+2].r = 1.0f;
+            OGL.triangles.vertices[v+2].g = 1.0f;
+            OGL.triangles.vertices[v+2].b = 1.0f;
+            OGL.triangles.vertices[v+3].r = 1.0f;
+            OGL.triangles.vertices[v+3].g = 1.0f;
+            OGL.triangles.vertices[v+3].b = 1.0f;
+        }
+
+        if (gSP.geometryMode & G_TEXTURE_GEN)
+        {
+            gSPTransformNormal4(v, gSP.matrix.projection);
+
+            if (gSP.geometryMode & G_TEXTURE_GEN_LINEAR)
+            {
+                OGL.triangles.vertices[v].s = acosf(OGL.triangles.vertices[v].nx) * 325.94931f;
+                OGL.triangles.vertices[v].t = acosf(OGL.triangles.vertices[v].ny) * 325.94931f;
+                OGL.triangles.vertices[v+1].s = acosf(OGL.triangles.vertices[v+1].nx) * 325.94931f;
+                OGL.triangles.vertices[v+1].t = acosf(OGL.triangles.vertices[v+1].ny) * 325.94931f;
+                OGL.triangles.vertices[v+2].s = acosf(OGL.triangles.vertices[v+2].nx) * 325.94931f;
+                OGL.triangles.vertices[v+2].t = acosf(OGL.triangles.vertices[v+2].ny) * 325.94931f;
+                OGL.triangles.vertices[v+3].s = acosf(OGL.triangles.vertices[v+3].nx) * 325.94931f;
+                OGL.triangles.vertices[v+3].t = acosf(OGL.triangles.vertices[v+3].ny) * 325.94931f;
+            }
+            else // G_TEXTURE_GEN
+            {
+                OGL.triangles.vertices[v].s = (OGL.triangles.vertices[v].nx + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v].t = (OGL.triangles.vertices[v].ny + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v+1].s = (OGL.triangles.vertices[v+1].nx + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v+1].t = (OGL.triangles.vertices[v+1].ny + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v+2].s = (OGL.triangles.vertices[v+2].nx + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v+2].t = (OGL.triangles.vertices[v+2].ny + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v+3].s = (OGL.triangles.vertices[v+3].nx + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v+3].t = (OGL.triangles.vertices[v+3].ny + 1.0f) * 512.0f;
+            }
+        }
+    }
+
+    if (config.enableClipping) gSPClipVertex4(v);
+}
+#endif
+
+void gSPClipVertex(u32 v)
+{
+    SPVertex *vtx = &OGL.triangles.vertices[v];
+    vtx->clip = 0;
+    if (vtx->x > +vtx->w)   vtx->clip |= CLIP_POSX;
+    if (vtx->x < -vtx->w)   vtx->clip |= CLIP_NEGX;
+    if (vtx->y > +vtx->w)   vtx->clip |= CLIP_POSY;
+    if (vtx->y < -vtx->w)   vtx->clip |= CLIP_NEGY;
+    //if (vtx->w < 0.1f)      vtx->clip |= CLIP_NEGW;
+}
+
+static void gSPTransformVertex_default(float vtx[4], float mtx[4][4])
+{
+    float x, y, z, w;
+    x = vtx[0];
+    y = vtx[1];
+    z = vtx[2];
+    w = vtx[3];
+
+    vtx[0] = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0];
+    vtx[1] = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1];
+    vtx[2] = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2];
+    vtx[3] = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3];
+}
+
+static void gSPLightVertex_default(u32 v)
+{
+    TransformVectorNormalize( &OGL.triangles.vertices[v].nx, gSP.matrix.modelView[gSP.matrix.modelViewi] );
+
+    f32 r, g, b;
+    r = gSP.lights[gSP.numLights].r;
+    g = gSP.lights[gSP.numLights].g;
+    b = gSP.lights[gSP.numLights].b;
+    for (int i = 0; i < gSP.numLights; i++)
+    {
+        f32 intensity = DotProduct( &OGL.triangles.vertices[v].nx, &gSP.lights[i].x );
+        if (intensity < 0.0f) intensity = 0.0f;
+        r += gSP.lights[i].r * intensity;
+        g += gSP.lights[i].g * intensity;
+        b += gSP.lights[i].b * intensity;
+    }
+    OGL.triangles.vertices[v].r = min(1.0, r);
+    OGL.triangles.vertices[v].g = min(1.0, g);
+    OGL.triangles.vertices[v].b = min(1.0, b);
+}
+
+static void gSPBillboardVertex_default(u32 v, u32 i)
+{
+    OGL.triangles.vertices[v].x += OGL.triangles.vertices[i].x;
+    OGL.triangles.vertices[v].y += OGL.triangles.vertices[i].y;
+    OGL.triangles.vertices[v].z += OGL.triangles.vertices[i].z;
+    OGL.triangles.vertices[v].w += OGL.triangles.vertices[i].w;
+}
+
+void gSPCombineMatrices()
+{
+    MultMatrix(gSP.matrix.projection, gSP.matrix.modelView[gSP.matrix.modelViewi], gSP.matrix.combined);
+    gSP.changed &= ~CHANGED_MATRIX;
+}
+
+void gSPProcessVertex( u32 v )
+{
+    f32 intensity;
+    f32 r, g, b;
+
+    if (gSP.changed & CHANGED_MATRIX)
+        gSPCombineMatrices();
+
+    gSPTransformVertex( &OGL.triangles.vertices[v].x, gSP.matrix.combined );
+
+    if (config.screen.flipVertical)
+    {
+        OGL.triangles.vertices[v].y = -OGL.triangles.vertices[v].y;
+    }
+
+    if (gDP.otherMode.depthSource)
+    {
+        OGL.triangles.vertices[v].z = gDP.primDepth.z * OGL.triangles.vertices[v].w;
+    }
+
+    if (gSP.matrix.billboard)
+    {
+        int i = 0;
+#ifdef __TRIBUFFER_OPT
+        i = OGL.triangles.indexmap[0];
+#endif
+
+        gSPBillboardVertex(v, i);
+    }
+
+    if (!(gSP.geometryMode & G_ZBUFFER))
+    {
+        OGL.triangles.vertices[v].z = -OGL.triangles.vertices[v].w;
+    }
+
+    if (config.enableClipping)
+        gSPClipVertex(v);
+
+    if (gSP.geometryMode & G_LIGHTING)
+    {
+        if (config.enableLighting)
+        {
+            gSPLightVertex(v);
+        }
+        else
+        {
+            OGL.triangles.vertices[v].r = 1.0f;
+            OGL.triangles.vertices[v].g = 1.0f;
+            OGL.triangles.vertices[v].b = 1.0f;
+        }
+
+        if (gSP.geometryMode & G_TEXTURE_GEN)
+        {
+            TransformVectorNormalize(&OGL.triangles.vertices[v].nx, gSP.matrix.projection);
+
+            if (gSP.geometryMode & G_TEXTURE_GEN_LINEAR)
+            {
+                OGL.triangles.vertices[v].s = acosf(OGL.triangles.vertices[v].nx) * 325.94931f;
+                OGL.triangles.vertices[v].t = acosf(OGL.triangles.vertices[v].ny) * 325.94931f;
+            }
+            else // G_TEXTURE_GEN
+            {
+                OGL.triangles.vertices[v].s = (OGL.triangles.vertices[v].nx + 1.0f) * 512.0f;
+                OGL.triangles.vertices[v].t = (OGL.triangles.vertices[v].ny + 1.0f) * 512.0f;
+            }
+        }
+    }
+}
+
+
+void gSPLoadUcodeEx( u32 uc_start, u32 uc_dstart, u16 uc_dsize )
+{
+    RSP.PCi = 0;
+    gSP.matrix.modelViewi = 0;
+    gSP.changed |= CHANGED_MATRIX;
+    gSP.status[0] = gSP.status[1] = gSP.status[2] = gSP.status[3] = 0;
+
+    if ((((uc_start & 0x1FFFFFFF) + 4096) > RDRAMSize) || (((uc_dstart & 0x1FFFFFFF) + uc_dsize) > RDRAMSize))
+    {
+        return;
+    }
+
+    MicrocodeInfo *ucode = GBI_DetectMicrocode( uc_start, uc_dstart, uc_dsize );
+
+    if (ucode->type != 0xFFFFFFFF)
+        last_good_ucode = ucode->type;
+
+    if (ucode->type != NONE)
+    {
+        GBI_MakeCurrent( ucode );
+    }
+    else
+    {
+        LOG(LOG_WARNING, "Unknown Ucode\n");
+    }
+}
+
+void gSPNoOp()
+{
+    gSPFlushTriangles();
+}
+
+void gSPTriangleUnknown()
+{
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+}
+
+void gSPMatrix( u32 matrix, u8 param )
+{
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+
+    f32 mtx[4][4];
+    u32 address = RSP_SegmentToPhysical( matrix );
+
+    if (address + 64 > RDRAMSize)
+    {
+        return;
+    }
+
+    RSP_LoadMatrix( mtx, address );
+
+    if (param & G_MTX_PROJECTION)
+    {
+        if (param & G_MTX_LOAD)
+            CopyMatrix( gSP.matrix.projection, mtx );
+        else
+            MultMatrix2( gSP.matrix.projection, mtx );
+    }
+    else
+    {
+        if ((param & G_MTX_PUSH) && (gSP.matrix.modelViewi < (gSP.matrix.stackSize - 1)))
+        {
+            CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi + 1], gSP.matrix.modelView[gSP.matrix.modelViewi] );
+            gSP.matrix.modelViewi++;
+        }
+        if (param & G_MTX_LOAD)
+            CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx );
+        else
+            MultMatrix2( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx );
+    }
+
+    gSP.changed |= CHANGED_MATRIX;
+}
+
+void gSPDMAMatrix( u32 matrix, u8 index, u8 multiply )
+{
+    f32 mtx[4][4];
+    u32 address = gSP.DMAOffsets.mtx + RSP_SegmentToPhysical( matrix );
+
+    if (address + 64 > RDRAMSize)
+    {
+        return;
+    }
+
+    RSP_LoadMatrix( mtx, address );
+
+    gSP.matrix.modelViewi = index;
+
+    if (multiply)
+    {
+        //CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], gSP.matrix.modelView[0] );
+        //MultMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx );
+        MultMatrix(gSP.matrix.modelView[0], mtx, gSP.matrix.modelView[gSP.matrix.modelViewi]);
+    }
+    else
+        CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx );
+
+    CopyMatrix( gSP.matrix.projection, identityMatrix );
+    gSP.changed |= CHANGED_MATRIX;
+}
+
+void gSPViewport( u32 v )
+{
+    u32 address = RSP_SegmentToPhysical( v );
+
+    if ((address + 16) > RDRAMSize)
+    {
+#ifdef DEBUG
+        DebugMsg( DEBUG_HIGH | DEBUG_ERROR, "// Attempting to load viewport from invalid address\n" );
+        DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gSPViewport( 0x%08X );\n", v );
+#endif
+        return;
+    }
+
+    gSP.viewport.vscale[0] = _FIXED2FLOAT( *(s16*)&RDRAM[address +  2], 2 );
+    gSP.viewport.vscale[1] = _FIXED2FLOAT( *(s16*)&RDRAM[address     ], 2 );
+    gSP.viewport.vscale[2] = _FIXED2FLOAT( *(s16*)&RDRAM[address +  6], 10 );// * 0.00097847357f;
+    gSP.viewport.vscale[3] = *(s16*)&RDRAM[address +  4];
+    gSP.viewport.vtrans[0] = _FIXED2FLOAT( *(s16*)&RDRAM[address + 10], 2 );
+    gSP.viewport.vtrans[1] = _FIXED2FLOAT( *(s16*)&RDRAM[address +  8], 2 );
+    gSP.viewport.vtrans[2] = _FIXED2FLOAT( *(s16*)&RDRAM[address + 14], 10 );// * 0.00097847357f;
+    gSP.viewport.vtrans[3] = *(s16*)&RDRAM[address + 12];
+
+    gSP.viewport.x      = gSP.viewport.vtrans[0] - gSP.viewport.vscale[0];
+    gSP.viewport.y      = gSP.viewport.vtrans[1] - gSP.viewport.vscale[1];
+    gSP.viewport.width  = gSP.viewport.vscale[0] * 2;
+    gSP.viewport.height = gSP.viewport.vscale[1] * 2;
+    gSP.viewport.nearz  = gSP.viewport.vtrans[2] - gSP.viewport.vscale[2];
+    gSP.viewport.farz   = (gSP.viewport.vtrans[2] + gSP.viewport.vscale[2]) ;
+
+    gSP.changed |= CHANGED_VIEWPORT;
+}
+
+void gSPForceMatrix( u32 mptr )
+{
+    u32 address = RSP_SegmentToPhysical( mptr );
+
+    if (address + 64 > RDRAMSize)
+    {
+        return;
+    }
+
+    RSP_LoadMatrix( gSP.matrix.combined, RSP_SegmentToPhysical( mptr ) );
+
+    gSP.changed &= ~CHANGED_MATRIX;
+}
+
+void gSPLight( u32 l, s32 n )
+{
+    n--;
+    if (n >= 8)
+        return;
+
+    u32 address = RSP_SegmentToPhysical( l );
+
+    if ((address + sizeof( Light )) > RDRAMSize)
+    {
+        return;
+    }
+
+    u8 *addr = &RDRAM[address];
+
+    if (config.hackZelda && (addr[0] == 0x08) && (addr[4] == 0xFF))
+    {
+        LightMM *light = (LightMM*)addr;
+        gSP.lights[n].r = light->r * 0.0039215689f;
+        gSP.lights[n].g = light->g * 0.0039215689f;
+        gSP.lights[n].b = light->b * 0.0039215689f;
+        gSP.lights[n].x = light->x;
+        gSP.lights[n].y = light->y;
+        gSP.lights[n].z = light->z;
+    }
+    else
+    {
+        Light *light = (Light*)addr;
+        gSP.lights[n].r = light->r * 0.0039215689f;
+        gSP.lights[n].g = light->g * 0.0039215689f;
+        gSP.lights[n].b = light->b * 0.0039215689f;
+        gSP.lights[n].x = light->x;
+        gSP.lights[n].y = light->y;
+        gSP.lights[n].z = light->z;
+    }
+    Normalize(&gSP.lights[n].x);
+}
+
+void gSPLookAt( u32 l )
+{
+}
+
+void gSPVertex( u32 v, u32 n, u32 v0 )
+{
+    //flush batched triangles:
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+
+    u32 address = RSP_SegmentToPhysical( v );
+
+    if ((address + sizeof( Vertex ) * n) > RDRAMSize)
+    {
+        return;
+    }
+
+    Vertex *vertex = (Vertex*)&RDRAM[address];
+
+    if ((n + v0) <= INDEXMAP_SIZE)
+    {
+        unsigned int i = v0;
+#ifdef __VEC4_OPT
+        for (; i < n - (n%4) + v0; i += 4)
+        {
+            u32 v = i;
+#ifdef __TRIBUFFER_OPT
+            v = __indexmap_getnew(v, 4);
+#endif
+            for(int j = 0; j < 4; j++)
+            {
+                OGL.triangles.vertices[v+j].x = vertex->x;
+                OGL.triangles.vertices[v+j].y = vertex->y;
+                OGL.triangles.vertices[v+j].z = vertex->z;
+                //OGL.triangles.vertices[i+j].flag = vertex->flag;
+                OGL.triangles.vertices[v+j].s = _FIXED2FLOAT( vertex->s, 5 );
+                OGL.triangles.vertices[v+j].t = _FIXED2FLOAT( vertex->t, 5 );
+                if (gSP.geometryMode & G_LIGHTING)
+                {
+                    OGL.triangles.vertices[v+j].nx = vertex->normal.x;
+                    OGL.triangles.vertices[v+j].ny = vertex->normal.y;
+                    OGL.triangles.vertices[v+j].nz = vertex->normal.z;
+                    OGL.triangles.vertices[v+j].a = vertex->color.a * 0.0039215689f;
+                }
+                else
+                {
+                    OGL.triangles.vertices[v+j].r = vertex->color.r * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].g = vertex->color.g * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].b = vertex->color.b * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].a = vertex->color.a * 0.0039215689f;
+                }
+                vertex++;
+            }
+            gSPProcessVertex4(v);
+        }
+#endif
+        for (; i < n + v0; i++)
+        {
+            u32 v = i;
+#ifdef __TRIBUFFER_OPT
+            v = __indexmap_getnew(v, 1);
+#endif
+            OGL.triangles.vertices[v].x = vertex->x;
+            OGL.triangles.vertices[v].y = vertex->y;
+            OGL.triangles.vertices[v].z = vertex->z;
+            OGL.triangles.vertices[v].s = _FIXED2FLOAT( vertex->s, 5 );
+            OGL.triangles.vertices[v].t = _FIXED2FLOAT( vertex->t, 5 );
+            if (gSP.geometryMode & G_LIGHTING)
+            {
+                OGL.triangles.vertices[v].nx = vertex->normal.x;
+                OGL.triangles.vertices[v].ny = vertex->normal.y;
+                OGL.triangles.vertices[v].nz = vertex->normal.z;
+                OGL.triangles.vertices[v].a = vertex->color.a * 0.0039215689f;
+            }
+            else
+            {
+                OGL.triangles.vertices[v].r = vertex->color.r * 0.0039215689f;
+                OGL.triangles.vertices[v].g = vertex->color.g * 0.0039215689f;
+                OGL.triangles.vertices[v].b = vertex->color.b * 0.0039215689f;
+                OGL.triangles.vertices[v].a = vertex->color.a * 0.0039215689f;
+            }
+            gSPProcessVertex(v);
+            vertex++;
+        }
+    }
+    else
+    {
+        LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n);
+    }
+
+}
+
+void gSPCIVertex( u32 v, u32 n, u32 v0 )
+{
+
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+
+    u32 address = RSP_SegmentToPhysical( v );
+
+    if ((address + sizeof( PDVertex ) * n) > RDRAMSize)
+    {
+        return;
+    }
+
+    PDVertex *vertex = (PDVertex*)&RDRAM[address];
+
+    if ((n + v0) <= INDEXMAP_SIZE)
+    {
+        unsigned int i = v0;
+#ifdef __VEC4_OPT
+        for (; i < n - (n%4) + v0; i += 4)
+        {
+            u32 v = i;
+#ifdef __TRIBUFFER_OPT
+            v = __indexmap_getnew(v, 4);
+#endif
+            for(unsigned int j = 0; j < 4; j++)
+            {
+                OGL.triangles.vertices[v+j].x = vertex->x;
+                OGL.triangles.vertices[v+j].y = vertex->y;
+                OGL.triangles.vertices[v+j].z = vertex->z;
+                OGL.triangles.vertices[v+j].s = _FIXED2FLOAT( vertex->s, 5 );
+                OGL.triangles.vertices[v+j].t = _FIXED2FLOAT( vertex->t, 5 );
+                u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)];
+
+                if (gSP.geometryMode & G_LIGHTING)
+                {
+                    OGL.triangles.vertices[v+j].nx = (s8)color[3];
+                    OGL.triangles.vertices[v+j].ny = (s8)color[2];
+                    OGL.triangles.vertices[v+j].nz = (s8)color[1];
+                    OGL.triangles.vertices[v+j].a = color[0] * 0.0039215689f;
+                }
+                else
+                {
+                    OGL.triangles.vertices[v+j].r = color[3] * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].g = color[2] * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].b = color[1] * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].a = color[0] * 0.0039215689f;
+                }
+                vertex++;
+            }
+            gSPProcessVertex4(v);
+        }
+#endif
+        for(; i < n + v0; i++)
+        {
+            u32 v = i;
+#ifdef __TRIBUFFER_OPT
+            v = __indexmap_getnew(v, 1);
+#endif
+            OGL.triangles.vertices[v].x = vertex->x;
+            OGL.triangles.vertices[v].y = vertex->y;
+            OGL.triangles.vertices[v].z = vertex->z;
+            OGL.triangles.vertices[v].s = _FIXED2FLOAT( vertex->s, 5 );
+            OGL.triangles.vertices[v].t = _FIXED2FLOAT( vertex->t, 5 );
+            u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)];
+
+            if (gSP.geometryMode & G_LIGHTING)
+            {
+                OGL.triangles.vertices[v].nx = (s8)color[3];
+                OGL.triangles.vertices[v].ny = (s8)color[2];
+                OGL.triangles.vertices[v].nz = (s8)color[1];
+                OGL.triangles.vertices[v].a = color[0] * 0.0039215689f;
+            }
+            else
+            {
+                OGL.triangles.vertices[v].r = color[3] * 0.0039215689f;
+                OGL.triangles.vertices[v].g = color[2] * 0.0039215689f;
+                OGL.triangles.vertices[v].b = color[1] * 0.0039215689f;
+                OGL.triangles.vertices[v].a = color[0] * 0.0039215689f;
+            }
+
+            gSPProcessVertex(v);
+            vertex++;
+        }
+    }
+    else
+    {
+        LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n);
+    }
+
+}
+
+void gSPDMAVertex( u32 v, u32 n, u32 v0 )
+{
+
+    u32 address = gSP.DMAOffsets.vtx + RSP_SegmentToPhysical( v );
+
+    if ((address + 10 * n) > RDRAMSize)
+    {
+        return;
+    }
+
+    if ((n + v0) <= INDEXMAP_SIZE)
+    {
+        u32 i = v0;
+#ifdef __VEC4_OPT
+        for (; i < n - (n%4) + v0; i += 4)
+        {
+            u32 v = i;
+#ifdef __TRIBUFFER_OPT
+            v = __indexmap_getnew(v, 4);
+#endif
+            for(int j = 0; j < 4; j++)
+            {
+                OGL.triangles.vertices[v+j].x = *(s16*)&RDRAM[address ^ 2];
+                OGL.triangles.vertices[v+j].y = *(s16*)&RDRAM[(address + 2) ^ 2];
+                OGL.triangles.vertices[v+j].z = *(s16*)&RDRAM[(address + 4) ^ 2];
+
+                if (gSP.geometryMode & G_LIGHTING)
+                {
+                    OGL.triangles.vertices[v+j].nx = *(s8*)&RDRAM[(address + 6) ^ 3];
+                    OGL.triangles.vertices[v+j].ny = *(s8*)&RDRAM[(address + 7) ^ 3];
+                    OGL.triangles.vertices[v+j].nz = *(s8*)&RDRAM[(address + 8) ^ 3];
+                    OGL.triangles.vertices[v+j].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f;
+                }
+                else
+                {
+                    OGL.triangles.vertices[v+j].r = *(u8*)&RDRAM[(address + 6) ^ 3] * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].g = *(u8*)&RDRAM[(address + 7) ^ 3] * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].b = *(u8*)&RDRAM[(address + 8) ^ 3] * 0.0039215689f;
+                    OGL.triangles.vertices[v+j].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f;
+                }
+                address += 10;
+            }
+            gSPProcessVertex4(v);
+        }
+#endif
+        for (; i < n + v0; i++)
+        {
+            u32 v = i;
+#ifdef __TRIBUFFER_OPT
+            //int ind = OGL.triangles.indexmap[i];
+            v = __indexmap_getnew(v, 1);
+
+            //if previously mapped copy across s/t.
+            //if (ind != -1)
+            //{
+            //    SPVertex *vtx = &OGL.triangles.vertices[ind];
+            //    OGL.triangles.vertices[v].s = vtx->s;
+            //    OGL.triangles.vertices[v].t = vtx->s;
+            //}
+#else
+            v = i;
+#endif
+            OGL.triangles.vertices[v].x = *(s16*)&RDRAM[address ^ 2];
+            OGL.triangles.vertices[v].y = *(s16*)&RDRAM[(address + 2) ^ 2];
+            OGL.triangles.vertices[v].z = *(s16*)&RDRAM[(address + 4) ^ 2];
+
+            if (gSP.geometryMode & G_LIGHTING)
+            {
+                OGL.triangles.vertices[v].nx = *(s8*)&RDRAM[(address + 6) ^ 3];
+                OGL.triangles.vertices[v].ny = *(s8*)&RDRAM[(address + 7) ^ 3];
+                OGL.triangles.vertices[v].nz = *(s8*)&RDRAM[(address + 8) ^ 3];
+                OGL.triangles.vertices[v].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f;
+            }
+            else
+            {
+                OGL.triangles.vertices[v].r = *(u8*)&RDRAM[(address + 6) ^ 3] * 0.0039215689f;
+                OGL.triangles.vertices[v].g = *(u8*)&RDRAM[(address + 7) ^ 3] * 0.0039215689f;
+                OGL.triangles.vertices[v].b = *(u8*)&RDRAM[(address + 8) ^ 3] * 0.0039215689f;
+                OGL.triangles.vertices[v].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f;
+            }
+
+            gSPProcessVertex(v);
+            address += 10;
+        }
+    }
+    else
+    {
+        LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n);
+    }
+
+}
+
+void gSPDisplayList( u32 dl )
+{
+    u32 address = RSP_SegmentToPhysical( dl );
+
+    if ((address + 8) > RDRAMSize)
+    {
+        return;
+    }
+
+    if (RSP.PCi < (GBI.PCStackSize - 1))
+    {
+#ifdef DEBUG
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "\n" );
+    DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gSPDisplayList( 0x%08X );\n",
+        dl );
+#endif
+        RSP.PCi++;
+        RSP.PC[RSP.PCi] = address;
+        RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[address], 24, 8 );
+    }
+
+
+}
+
+void gSPDMADisplayList( u32 dl, u32 n )
+{
+    if ((dl + (n << 3)) > RDRAMSize)
+    {
+        return;
+    }
+
+    u32 curDL = RSP.PC[RSP.PCi];
+
+    RSP.PC[RSP.PCi] = RSP_SegmentToPhysical( dl );
+
+    while ((RSP.PC[RSP.PCi] - dl) < (n << 3))
+    {
+        if ((RSP.PC[RSP.PCi] + 8) > RDRAMSize)
+        {
+            break;
+        }
+
+        u32 w0 = *(u32*)&RDRAM[RSP.PC[RSP.PCi]];
+        u32 w1 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4];
+
+        RSP.PC[RSP.PCi] += 8;
+        RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[RSP.PC[RSP.PCi]], 24, 8 );
+
+        GBI.cmd[_SHIFTR( w0, 24, 8 )]( w0, w1 );
+    }
+
+    RSP.PC[RSP.PCi] = curDL;
+}
+
+void gSPBranchList( u32 dl )
+{
+    u32 address = RSP_SegmentToPhysical( dl );
+
+    if ((address + 8) > RDRAMSize)
+    {
+        return;
+    }
+
+    RSP.PC[RSP.PCi] = address;
+    RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[address], 24, 8 );
+}
+
+void gSPBranchLessZ( u32 branchdl, u32 vtx, f32 zval )
+{
+    u32 address = RSP_SegmentToPhysical( branchdl );
+
+    if ((address + 8) > RDRAMSize)
+    {
+        return;
+    }
+
+    if (OGL.triangles.vertices[vtx].z <= zval)
+        RSP.PC[RSP.PCi] = address;
+}
+
+void gSPSetDMAOffsets( u32 mtxoffset, u32 vtxoffset )
+{
+    gSP.DMAOffsets.mtx = mtxoffset;
+    gSP.DMAOffsets.vtx = vtxoffset;
+}
+
+void gSPSetVertexColorBase( u32 base )
+{
+    gSP.vertexColorBase = RSP_SegmentToPhysical( base );
+
+#ifdef __TRIBUFFER_OPT
+    gSPFlushTriangles();
+#endif
+}
+
+void gSPSprite2DBase( u32 base )
+{
+}
+
+void gSPCopyVertex( SPVertex *dest, SPVertex *src )
+{
+    dest->x = src->x;
+    dest->y = src->y;
+    dest->z = src->z;
+    dest->w = src->w;
+    dest->r = src->r;
+    dest->g = src->g;
+    dest->b = src->b;
+    dest->a = src->a;
+    dest->s = src->s;
+    dest->t = src->t;
+}
+
+void gSPInterpolateVertex( SPVertex *dest, f32 percent, SPVertex *first, SPVertex *second )
+{
+    dest->x = first->x + percent * (second->x - first->x);
+    dest->y = first->y + percent * (second->y - first->y);
+    dest->z = first->z + percent * (second->z - first->z);
+    dest->w = first->w + percent * (second->w - first->w);
+    dest->r = first->r + percent * (second->r - first->r);
+    dest->g = first->g + percent * (second->g - first->g);
+    dest->b = first->b + percent * (second->b - first->b);
+    dest->a = first->a + percent * (second->a - first->a);
+    dest->s = first->s + percent * (second->s - first->s);
+    dest->t = first->t + percent * (second->t - first->t);
+}
+
+void gSPDMATriangles( u32 tris, u32 n )
+{
+    u32 address = RSP_SegmentToPhysical( tris );
+
+    if (address + sizeof( DKRTriangle ) * n > RDRAMSize)
+    {
+        return;
+    }
+
+#ifdef __TRIBUFFER_OPT
+    __indexmap_undomap();
+#endif
+
+    DKRTriangle *triangles = (DKRTriangle*)&RDRAM[address];
+
+    for (u32 i = 0; i < n; i++)
+    {
+        int mode = 0;
+        if (!(triangles->flag & 0x40))
+        {
+            if (gSP.viewport.vscale[0] > 0)
+                mode |= G_CULL_BACK;
+            else
+                mode |= G_CULL_FRONT;
+        }
+
+        if ((gSP.geometryMode&G_CULL_BOTH) != mode)
+        {
+            OGL_DrawTriangles();
+            gSP.geometryMode &= ~G_CULL_BOTH;
+            gSP.geometryMode |= mode;
+            gSP.changed |= CHANGED_GEOMETRYMODE;
+        }
+
+
+        s32 v0 = triangles->v0;
+        s32 v1 = triangles->v1;
+        s32 v2 = triangles->v2;
+        OGL.triangles.vertices[v0].s = _FIXED2FLOAT( triangles->s0, 5 );
+        OGL.triangles.vertices[v0].t = _FIXED2FLOAT( triangles->t0, 5 );
+        OGL.triangles.vertices[v1].s = _FIXED2FLOAT( triangles->s1, 5 );
+        OGL.triangles.vertices[v1].t = _FIXED2FLOAT( triangles->t1, 5 );
+        OGL.triangles.vertices[v2].s = _FIXED2FLOAT( triangles->s2, 5 );
+        OGL.triangles.vertices[v2].t = _FIXED2FLOAT( triangles->t2, 5 );
+        gSPTriangle(triangles->v0, triangles->v1, triangles->v2);
+        triangles++;
+    }
+
+#ifdef __TRIBUFFER_OPT
+    OGL_DrawTriangles();
+#endif
+}
+
+void gSP1Quadrangle( s32 v0, s32 v1, s32 v2, s32 v3)
+{
+    gSPTriangle( v0, v1, v2);
+    gSPTriangle( v0, v2, v3);
+    gSPFlushTriangles();
+}
+
+bool gSPCullVertices( u32 v0, u32 vn )
+{
+    if (!config.enableClipping)
+        return FALSE;
+
+    s32 v = v0;
+#ifdef __TRIBUFFER_OPT
+    v = OGL.triangles.indexmap[v0];
+#endif
+
+    u32 clip = OGL.triangles.vertices[v].clip;
+    if (clip == 0)
+        return FALSE;
+
+    for (unsigned int i = (v0+1); i <= vn; i++)
+    {
+        v = i;
+#ifdef __TRIBUFFER_OPT
+        v = OGL.triangles.indexmap[i];
+#endif
+        if (OGL.triangles.vertices[v].clip != clip) return FALSE;
+    }
+    return TRUE;
+}
+
+void gSPCullDisplayList( u32 v0, u32 vn )
+{
+    if (gSPCullVertices( v0, vn ))
+    {
+        if (RSP.PCi > 0)
+            RSP.PCi--;
+        else
+        {
+            RSP.halt = TRUE;
+        }
+    }
+}
+
+void gSPPopMatrixN( u32 param, u32 num )
+{
+    if (gSP.matrix.modelViewi > num - 1)
+    {
+        gSP.matrix.modelViewi -= num;
+
+        gSP.changed |= CHANGED_MATRIX;
+    }
+}
+
+void gSPPopMatrix( u32 param )
+{
+    if (gSP.matrix.modelViewi > 0)
+    {
+        gSP.matrix.modelViewi--;
+
+        gSP.changed |= CHANGED_MATRIX;
+    }
+}
+
+void gSPSegment( s32 seg, s32 base )
+{
+    if (seg > 0xF)
+    {
+        return;
+    }
+
+    if ((unsigned int)base > RDRAMSize - 1)
+    {
+        return;
+    }
+
+    gSP.segment[seg] = base;
+}
+
+void gSPClipRatio( u32 r )
+{
+}
+
+void gSPInsertMatrix( u32 where, u32 num )
+{
+    f32 fraction, integer;
+
+    if (gSP.changed & CHANGED_MATRIX)
+        gSPCombineMatrices();
+
+    if ((where & 0x3) || (where > 0x3C))
+    {
+        return;
+    }
+
+    if (where < 0x20)
+    {
+        fraction = modff( gSP.matrix.combined[0][where >> 1], &integer );
+        gSP.matrix.combined[0][where >> 1] = (s16)_SHIFTR( num, 16, 16 ) + abs( (int)fraction );
+
+        fraction = modff( gSP.matrix.combined[0][(where >> 1) + 1], &integer );
+        gSP.matrix.combined[0][(where >> 1) + 1] = (s16)_SHIFTR( num, 0, 16 ) + abs( (int)fraction );
+    }
+    else
+    {
+        f32 newValue;
+
+        fraction = modff( gSP.matrix.combined[0][(where - 0x20) >> 1], &integer );
+        newValue = integer + _FIXED2FLOAT( _SHIFTR( num, 16, 16 ), 16);
+
+        // Make sure the sign isn't lost
+        if ((integer == 0.0f) && (fraction != 0.0f))
+            newValue = newValue * (fraction / abs( (int)fraction ));
+
+        gSP.matrix.combined[0][(where - 0x20) >> 1] = newValue;
+
+        fraction = modff( gSP.matrix.combined[0][((where - 0x20) >> 1) + 1], &integer );
+        newValue = integer + _FIXED2FLOAT( _SHIFTR( num, 0, 16 ), 16 );
+
+        // Make sure the sign isn't lost
+        if ((integer == 0.0f) && (fraction != 0.0f))
+            newValue = newValue * (fraction / abs( (int)fraction ));
+
+        gSP.matrix.combined[0][((where - 0x20) >> 1) + 1] = newValue;
+    }
+}
+
+void gSPModifyVertex( u32 vtx, u32 where, u32 val )
+{
+    s32 v = vtx;
+
+#ifdef __TRIBUFFER_OPT
+    v = OGL.triangles.indexmap[v];
+#endif
+
+    switch (where)
+    {
+        case G_MWO_POINT_RGBA:
+            OGL.triangles.vertices[v].r = _SHIFTR( val, 24, 8 ) * 0.0039215689f;
+            OGL.triangles.vertices[v].g = _SHIFTR( val, 16, 8 ) * 0.0039215689f;
+            OGL.triangles.vertices[v].b = _SHIFTR( val, 8, 8 ) * 0.0039215689f;
+            OGL.triangles.vertices[v].a = _SHIFTR( val, 0, 8 ) * 0.0039215689f;
+            break;
+        case G_MWO_POINT_ST:
+            OGL.triangles.vertices[v].s = _FIXED2FLOAT( (s16)_SHIFTR( val, 16, 16 ), 5 );
+            OGL.triangles.vertices[v].t = _FIXED2FLOAT( (s16)_SHIFTR( val, 0, 16 ), 5 );
+            break;
+        case G_MWO_POINT_XYSCREEN:
+            break;
+        case G_MWO_POINT_ZSCREEN:
+            break;
+    }
+}
+
+void gSPNumLights( s32 n )
+{
+    gSP.numLights = (n <= 8) ? n : 0;
+}
+
+
+void gSPLightColor( u32 lightNum, u32 packedColor )
+{
+    lightNum--;
+
+    if (lightNum < 8)
+    {
+        gSP.lights[lightNum].r = _SHIFTR( packedColor, 24, 8 ) * 0.0039215689f;
+        gSP.lights[lightNum].g = _SHIFTR( packedColor, 16, 8 ) * 0.0039215689f;
+        gSP.lights[lightNum].b = _SHIFTR( packedColor, 8, 8 ) * 0.0039215689f;
+    }
+}
+
+void gSPFogFactor( s16 fm, s16 fo )
+{
+    gSP.fog.multiplier = fm;
+    gSP.fog.offset = fo;
+
+    gSP.changed |= CHANGED_FOGPOSITION;
+}
+
+void gSPPerspNormalize( u16 scale )
+{
+}
+
+void gSPTexture( f32 sc, f32 tc, s32 level, s32 tile, s32 on )
+{
+    gSP.texture.scales = sc;
+    gSP.texture.scalet = tc;
+
+    if (gSP.texture.scales == 0.0f) gSP.texture.scales = 1.0f;
+    if (gSP.texture.scalet == 0.0f) gSP.texture.scalet = 1.0f;
+
+    gSP.texture.level = level;
+    gSP.texture.on = on;
+
+    if (gSP.texture.tile != tile)
+    {
+        gSP.texture.tile = tile;
+        gSP.textureTile[0] = &gDP.tiles[tile];
+        gSP.textureTile[1] = &gDP.tiles[(tile < 7) ? (tile + 1) : tile];
+        gSP.changed |= CHANGED_TEXTURE;
+    }
+
+    gSP.changed |= CHANGED_TEXTURESCALE;
+}
+
+void gSPEndDisplayList()
+{
+    if (RSP.PCi > 0)
+        RSP.PCi--;
+    else
+    {
+        RSP.halt = TRUE;
+    }
+
+#ifdef __TRIBUFFER_OPT
+    RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[RSP.PC[RSP.PCi]], 24, 8 );
+    gSPFlushTriangles();
+#endif
+}
+
+void gSPGeometryMode( u32 clear, u32 set )
+{
+    gSP.geometryMode = (gSP.geometryMode & ~clear) | set;
+    gSP.changed |= CHANGED_GEOMETRYMODE;
+}
+
+void gSPSetGeometryMode( u32 mode )
+{
+    gSP.geometryMode |= mode;
+    gSP.changed |= CHANGED_GEOMETRYMODE;
+}
+
+void gSPClearGeometryMode( u32 mode )
+{
+    gSP.geometryMode &= ~mode;
+    gSP.changed |= CHANGED_GEOMETRYMODE;
+}
+
+void gSPLine3D( s32 v0, s32 v1, s32 flag )
+{
+    OGL_DrawLine(v0, v1, 1.5f );
+}
+
+void gSPLineW3D( s32 v0, s32 v1, s32 wd, s32 flag )
+{
+    OGL_DrawLine(v0, v1, 1.5f + wd * 0.5f );
+}
+
+void gSPBgRect1Cyc( u32 bg )
+{
+
+#if 1
+
+    u32 addr = RSP_SegmentToPhysical(bg) >> 1;
+
+    f32 imageX = (((u16*)RDRAM)[(addr+0)^1] >> 5);     // 0
+    f32 imageY = (((u16*)RDRAM)[(addr+4)^1] >> 5);     // 4
+    f32 imageW = (((u16*)RDRAM)[(addr+1)^1] >> 2);     // 1
+    f32 imageH = (((u16*)RDRAM)[(addr+5)^1] >> 2);     // 5
+
+    f32 frameX = ((s16*)RDRAM)[(addr+2)^1] / 4.0f;     // 2
+    f32 frameY = ((s16*)RDRAM)[(addr+6)^1] / 4.0f;     // 6
+    f32 frameW = ((u16*)RDRAM)[(addr+3)^1] >> 2;               // 3
+    f32 frameH = ((u16*)RDRAM)[(addr+7)^1] >> 2;               // 7
+
+
+    //wxUint16 imageFlip = ((u16*)gfx.RDRAM)[(addr+13)^1];     // 13;
+    //d.flipX  = (u8)imageFlip&0x01;
+
+    gSP.bgImage.address        = RSP_SegmentToPhysical(((u32*)RDRAM)[(addr+8)>>1]);    // 8,9
+    gSP.bgImage.width = imageW;
+    gSP.bgImage.height = imageH;
+    gSP.bgImage.format = ((u8*)RDRAM)[(((addr+11)<<1)+0)^3];
+    gSP.bgImage.size = ((u8*)RDRAM)[(((addr+11)<<1)+1)^3];
+    gSP.bgImage.palette = ((u16*)RDRAM)[(addr+12)^1];
+
+    f32 scaleW = ((s16*)RDRAM)[(addr+14)^1] / 1024.0f; // 14
+    f32 scaleH = ((s16*)RDRAM)[(addr+15)^1] / 1024.0f; // 15
+    gDP.textureMode = TEXTUREMODE_BGIMAGE;
+
+#else
+    u32 address = RSP_SegmentToPhysical( bg );
+    uObjScaleBg *objScaleBg = (uObjScaleBg*)&RDRAM[address];
+
+    gSP.bgImage.address = RSP_SegmentToPhysical( objScaleBg->imagePtr );
+    gSP.bgImage.width = objScaleBg->imageW >> 2;
+    gSP.bgImage.height = objScaleBg->imageH >> 2;
+    gSP.bgImage.format = objScaleBg->imageFmt;
+    gSP.bgImage.size = objScaleBg->imageSiz;
+    gSP.bgImage.palette = objScaleBg->imagePal;
+    gDP.textureMode = TEXTUREMODE_BGIMAGE;
+
+    f32 imageX = _FIXED2FLOAT( objScaleBg->imageX, 5 );
+    f32 imageY = _FIXED2FLOAT( objScaleBg->imageY, 5 );
+    f32 imageW = objScaleBg->imageW >> 2;
+    f32 imageH = objScaleBg->imageH >> 2;
+
+    f32 frameX = _FIXED2FLOAT( objScaleBg->frameX, 2 );
+    f32 frameY = _FIXED2FLOAT( objScaleBg->frameY, 2 );
+    f32 frameW = _FIXED2FLOAT( objScaleBg->frameW, 2 );
+    f32 frameH = _FIXED2FLOAT( objScaleBg->frameH, 2 );
+    f32 scaleW = _FIXED2FLOAT( objScaleBg->scaleW, 10 );
+    f32 scaleH = _FIXED2FLOAT( objScaleBg->scaleH, 10 );
+#endif
+
+    f32 frameX0 = frameX;
+    f32 frameY0 = frameY;
+    f32 frameS0 = imageX;
+    f32 frameT0 = imageY;
+
+    f32 frameX1 = frameX + min( (imageW - imageX) / scaleW, frameW );
+    f32 frameY1 = frameY + min( (imageH - imageY) / scaleH, frameH );
+    //f32 frameS1 = imageX + min( (imageW - imageX) * scaleW, frameW * scaleW );
+    //f32 frameT1 = imageY + min( (imageH - imageY) * scaleH, frameH * scaleH );
+
+    gDP.otherMode.cycleType = G_CYC_1CYCLE;
+    gDP.changed |= CHANGED_CYCLETYPE;
+    gSPTexture( 1.0f, 1.0f, 0, 0, TRUE );
+    gDPTextureRectangle( frameX0, frameY0, frameX1 - 1, frameY1 - 1, 0, frameS0 - 1, frameT0 - 1, scaleW, scaleH );
+
+    if ((frameX1 - frameX0) < frameW)
+    {
+        f32 frameX2 = frameW - (frameX1 - frameX0) + frameX1;
+        gDPTextureRectangle( frameX1, frameY0, frameX2 - 1, frameY1 - 1, 0, 0, frameT0, scaleW, scaleH );
+    }
+
+    if ((frameY1 - frameY0) < frameH)
+    {
+        f32 frameY2 = frameH - (frameY1 - frameY0) + frameY1;
+        gDPTextureRectangle( frameX0, frameY1, frameX1 - 1, frameY2 - 1, 0, frameS0, 0, scaleW, scaleH );
+    }
+
+    gDPTextureRectangle( 0, 0, 319, 239, 0, 0, 0, scaleW, scaleH );
+}
+
+void gSPBgRectCopy( u32 bg )
+{
+
+    return;
+    u32 address = RSP_SegmentToPhysical( bg );
+    uObjBg *objBg = (uObjBg*)&RDRAM[address];
+
+    gSP.bgImage.address = RSP_SegmentToPhysical( objBg->imagePtr );
+    gSP.bgImage.width = objBg->imageW >> 2;
+    gSP.bgImage.height = objBg->imageH >> 2;
+    gSP.bgImage.format = objBg->imageFmt;
+    gSP.bgImage.size = objBg->imageSiz;
+    gSP.bgImage.palette = objBg->imagePal;
+    gDP.textureMode = TEXTUREMODE_BGIMAGE;
+
+    u16 imageX = objBg->imageX >> 5;
+    u16 imageY = objBg->imageY >> 5;
+
+    s16 frameX = objBg->frameX / 4;
+    s16 frameY = objBg->frameY / 4;
+    u16 frameW = objBg->frameW >> 2;
+    u16 frameH = objBg->frameH >> 2;
+
+    gSPTexture( 1.0f, 1.0f, 0, 0, TRUE );
+
+    gDPTextureRectangle( frameX, frameY, frameX + frameW - 1, frameY + frameH - 1, 0, imageX, imageY, 4, 1 );
+}
+
+void gSPObjRectangle( u32 sp )
+{
+    u32 address = RSP_SegmentToPhysical( sp );
+    uObjSprite *objSprite = (uObjSprite*)&RDRAM[address];
+
+    f32 scaleW = _FIXED2FLOAT( objSprite->scaleW, 10 );
+    f32 scaleH = _FIXED2FLOAT( objSprite->scaleH, 10 );
+    f32 objX = _FIXED2FLOAT( objSprite->objX, 2 );
+    f32 objY = _FIXED2FLOAT( objSprite->objY, 2 );
+    u32 imageW = objSprite->imageW >> 2;
+    u32 imageH = objSprite->imageH >> 2;
+
+    gDPTextureRectangle( objX, objY, objX + imageW / scaleW - 1, objY + imageH / scaleH - 1, 0, 0.0f, 0.0f, scaleW * (gDP.otherMode.cycleType == G_CYC_COPY ? 4.0f : 1.0f), scaleH );
+}
+
+void gSPObjLoadTxtr( u32 tx )
+{
+    u32 address = RSP_SegmentToPhysical( tx );
+    uObjTxtr *objTxtr = (uObjTxtr*)&RDRAM[address];
+
+    if ((gSP.status[objTxtr->block.sid >> 2] & objTxtr->block.mask) != objTxtr->block.flag)
+    {
+        switch (objTxtr->block.type)
+        {
+            case G_OBJLT_TXTRBLOCK:
+                gDPSetTextureImage( 0, 1, 0, objTxtr->block.image );
+                gDPSetTile( 0, 1, 0, objTxtr->block.tmem, 7, 0, 0, 0, 0, 0, 0, 0 );
+                gDPLoadBlock( 7, 0, 0, ((objTxtr->block.tsize + 1) << 3) - 1, objTxtr->block.tline );
+                break;
+            case G_OBJLT_TXTRTILE:
+                gDPSetTextureImage( 0, 1, (objTxtr->tile.twidth + 1) << 1, objTxtr->tile.image );
+                gDPSetTile( 0, 1, (objTxtr->tile.twidth + 1) >> 2, objTxtr->tile.tmem, 7, 0, 0, 0, 0, 0, 0, 0 );
+                gDPLoadTile( 7, 0, 0, (((objTxtr->tile.twidth + 1) << 1) - 1) << 2, (((objTxtr->tile.theight + 1) >> 2) - 1) << 2 );
+                break;
+            case G_OBJLT_TLUT:
+                gDPSetTextureImage( 0, 2, 1, objTxtr->tlut.image );
+                gDPSetTile( 0, 2, 0, objTxtr->tlut.phead, 7, 0, 0, 0, 0, 0, 0, 0 );
+                gDPLoadTLUT( 7, 0, 0, objTxtr->tlut.pnum << 2, 0 );
+                break;
+        }
+        gSP.status[objTxtr->block.sid >> 2] = (gSP.status[objTxtr->block.sid >> 2] & ~objTxtr->block.mask) | (objTxtr->block.flag & objTxtr->block.mask);
+    }
+}
+
+void gSPObjSprite( u32 sp )
+{
+    u32 address = RSP_SegmentToPhysical( sp );
+    uObjSprite *objSprite = (uObjSprite*)&RDRAM[address];
+
+    f32 scaleW = _FIXED2FLOAT( objSprite->scaleW, 10 );
+    f32 scaleH = _FIXED2FLOAT( objSprite->scaleH, 10 );
+    f32 objX = _FIXED2FLOAT( objSprite->objX, 2 );
+    f32 objY = _FIXED2FLOAT( objSprite->objY, 2 );
+    u32 imageW = objSprite->imageW >> 5;
+    u32 imageH = objSprite->imageH >> 5;
+
+    f32 x0 = objX;
+    f32 y0 = objY;
+    f32 x1 = objX + imageW / scaleW - 1;
+    f32 y1 = objY + imageH / scaleH - 1;
+
+    s32 v0=0,v1=1,v2=2,v3=3;
+
+#ifdef __TRIBUFFER_OPT
+    v0 = OGL.triangles.indexmap[v0];
+    v1 = OGL.triangles.indexmap[v1];
+    v2 = OGL.triangles.indexmap[v2];
+    v3 = OGL.triangles.indexmap[v3];
+#endif
+
+    OGL.triangles.vertices[v0].x = gSP.objMatrix.A * x0 + gSP.objMatrix.B * y0 + gSP.objMatrix.X;
+    OGL.triangles.vertices[v0].y = gSP.objMatrix.C * x0 + gSP.objMatrix.D * y0 + gSP.objMatrix.Y;
+    OGL.triangles.vertices[v0].z = 0.0f;
+    OGL.triangles.vertices[v0].w = 1.0f;
+    OGL.triangles.vertices[v0].s = 0.0f;
+    OGL.triangles.vertices[v0].t = 0.0f;
+    OGL.triangles.vertices[v1].x = gSP.objMatrix.A * x1 + gSP.objMatrix.B * y0 + gSP.objMatrix.X;
+    OGL.triangles.vertices[v1].y = gSP.objMatrix.C * x1 + gSP.objMatrix.D * y0 + gSP.objMatrix.Y;
+    OGL.triangles.vertices[v1].z = 0.0f;
+    OGL.triangles.vertices[v1].w = 1.0f;
+    OGL.triangles.vertices[v1].s = imageW - 1;
+    OGL.triangles.vertices[v1].t = 0.0f;
+    OGL.triangles.vertices[v2].x = gSP.objMatrix.A * x1 + gSP.objMatrix.B * y1 + gSP.objMatrix.X;
+    OGL.triangles.vertices[v2].y = gSP.objMatrix.C * x1 + gSP.objMatrix.D * y1 + gSP.objMatrix.Y;
+    OGL.triangles.vertices[v2].z = 0.0f;
+    OGL.triangles.vertices[v2].w = 1.0f;
+    OGL.triangles.vertices[v2].s = imageW - 1;
+    OGL.triangles.vertices[v2].t = imageH - 1;
+    OGL.triangles.vertices[v3].x = gSP.objMatrix.A * x0 + gSP.objMatrix.B * y1 + gSP.objMatrix.X;
+    OGL.triangles.vertices[v3].y = gSP.objMatrix.C * x0 + gSP.objMatrix.D * y1 + gSP.objMatrix.Y;
+    OGL.triangles.vertices[v3].z = 0.0f;
+    OGL.triangles.vertices[v3].w = 1.0f;
+    OGL.triangles.vertices[v3].s = 0;
+    OGL.triangles.vertices[v3].t = imageH - 1;
+
+    gDPSetTile( objSprite->imageFmt, objSprite->imageSiz, objSprite->imageStride, objSprite->imageAdrs, 0, objSprite->imagePal, G_TX_CLAMP, G_TX_CLAMP, 0, 0, 0, 0 );
+    gDPSetTileSize( 0, 0, 0, (imageW - 1) << 2, (imageH - 1) << 2 );
+    gSPTexture( 1.0f, 1.0f, 0, 0, TRUE );
+
+    //glOrtho( 0, VI.width, VI.height, 0, 0.0f, 32767.0f );
+    OGL.triangles.vertices[v0].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f;
+    OGL.triangles.vertices[v0].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f;
+    OGL.triangles.vertices[v0].z = -1.0f;
+    OGL.triangles.vertices[v0].w = 1.0f;
+    OGL.triangles.vertices[v1].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f;
+    OGL.triangles.vertices[v1].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f;
+    OGL.triangles.vertices[v1].z = -1.0f;
+    OGL.triangles.vertices[v1].w = 1.0f;
+    OGL.triangles.vertices[v2].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f;
+    OGL.triangles.vertices[v2].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f;
+    OGL.triangles.vertices[v2].z = -1.0f;
+    OGL.triangles.vertices[v2].w = 1.0f;
+    OGL.triangles.vertices[v3].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f;
+    OGL.triangles.vertices[v3].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f;
+    OGL.triangles.vertices[v3].z = -1.0f;
+    OGL.triangles.vertices[v3].w = 1.0f;
+
+    OGL_AddTriangle(v0, v1, v2);
+    OGL_AddTriangle(v0, v2, v3);
+    OGL_DrawTriangles();
+
+    if (depthBuffer.current) depthBuffer.current->cleared = FALSE;
+    gDP.colorImage.changed = TRUE;
+    gDP.colorImage.height = (unsigned int)(max( gDP.colorImage.height, gDP.scissor.lry ));
+}
+
+void gSPObjLoadTxSprite( u32 txsp )
+{
+    gSPObjLoadTxtr( txsp );
+    gSPObjSprite( txsp + sizeof( uObjTxtr ) );
+}
+
+void gSPObjLoadTxRectR( u32 txsp )
+{
+    gSPObjLoadTxtr( txsp );
+//  gSPObjRectangleR( txsp + sizeof( uObjTxtr ) );
+}
+
+void gSPObjMatrix( u32 mtx )
+{
+    u32 address = RSP_SegmentToPhysical( mtx );
+    uObjMtx *objMtx = (uObjMtx*)&RDRAM[address];
+
+    gSP.objMatrix.A = _FIXED2FLOAT( objMtx->A, 16 );
+    gSP.objMatrix.B = _FIXED2FLOAT( objMtx->B, 16 );
+    gSP.objMatrix.C = _FIXED2FLOAT( objMtx->C, 16 );
+    gSP.objMatrix.D = _FIXED2FLOAT( objMtx->D, 16 );
+    gSP.objMatrix.X = _FIXED2FLOAT( objMtx->X, 2 );
+    gSP.objMatrix.Y = _FIXED2FLOAT( objMtx->Y, 2 );
+    gSP.objMatrix.baseScaleX = _FIXED2FLOAT( objMtx->BaseScaleX, 10 );
+    gSP.objMatrix.baseScaleY = _FIXED2FLOAT( objMtx->BaseScaleY, 10 );
+}
+
+void gSPObjSubMatrix( u32 mtx )
+{
+}
+
+
+#ifdef __VEC4_OPT
+void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) =
+        gSPTransformVertex4_default;
+void (*gSPTransformNormal4)(u32 v, float mtx[4][4]) =
+        gSPTransformNormal4_default;
+void (*gSPLightVertex4)(u32 v) = gSPLightVertex4_default;
+void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4_default;
+#endif
+void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]) =
+        gSPTransformVertex_default;
+void (*gSPLightVertex)(u32 v) = gSPLightVertex_default;
+void (*gSPBillboardVertex)(u32 v, u32 i) = gSPBillboardVertex_default;
+
diff --git a/source/gles2n64/src/gSP.h b/source/gles2n64/src/gSP.h
new file mode 100644 (file)
index 0000000..6498224
--- /dev/null
@@ -0,0 +1,264 @@
+#ifndef GSP_H
+#define GSP_H
+
+#include "Types.h"
+#include "GBI.h"
+#include "gDP.h"
+
+#define CHANGED_VIEWPORT        0x01
+#define CHANGED_MATRIX          0x02
+#define CHANGED_COLORBUFFER     0x04
+#define CHANGED_GEOMETRYMODE    0x08
+#define CHANGED_TEXTURE         0x10
+#define CHANGED_FOGPOSITION     0x20
+#define CHANGED_TEXTURESCALE    0x40
+
+//#ifdef __TRIBUFFER_OPT
+//    #define gSPFlushTriangles() \
+//    if \
+//    ( \
+//        (OGL.triangles.num > 1000) || \
+//        ( \
+//            (RSP.nextCmd != G_NOOP) && \
+//            (RSP.nextCmd != G_RDPNOOP) && \
+//            (RSP.nextCmd != G_MOVEMEM) && \
+//            (RSP.nextCmd != G_ENDDL) && \
+//            (RSP.nextCmd != G_DL) && \
+//            (RSP.nextCmd != G_VTXCOLORBASE) && \
+//            (RSP.nextCmd != G_TRI1) && \
+//            (RSP.nextCmd != G_TRI2) && \
+//            (RSP.nextCmd != G_TRI4) && \
+//            (RSP.nextCmd != G_QUAD) && \
+//            (RSP.nextCmd != G_VTX) && \
+//            (RSP.nextCmd != G_MTX) \
+//        ) \
+//    ) \
+//    { \
+//        OGL_DrawTriangles(); \
+//    }
+//#else
+//    #define gSPFlushTriangles() \
+//    if \
+//    ( \
+//        (RSP.nextCmd != G_TRI1) && \
+//        (RSP.nextCmd != G_TRI2) && \
+//        (RSP.nextCmd != G_TRI4) && \
+//        (RSP.nextCmd != G_QUAD) \
+//    ) \
+//    { \
+//        OGL_DrawTriangles(); \
+//    }
+//#endif
+#define gSPFlushTriangles() \
+if \
+( \
+    ( \
+         (config.tribufferOpt) && \
+         (OGL.triangles.num > 1000) || \
+         ( \
+             (RSP.nextCmd != G_NOOP) && \
+             (RSP.nextCmd != G_RDPNOOP) && \
+             (RSP.nextCmd != G_MOVEMEM) && \
+             (RSP.nextCmd != G_ENDDL) && \
+             (RSP.nextCmd != G_DL) && \
+             (RSP.nextCmd != G_VTXCOLORBASE) && \
+             (RSP.nextCmd != G_TRI1) && \
+             (RSP.nextCmd != G_TRI2) && \
+             (RSP.nextCmd != G_TRI4) && \
+             (RSP.nextCmd != G_QUAD) && \
+             (RSP.nextCmd != G_VTX) && \
+             (RSP.nextCmd != G_MTX) \
+         ) \
+    ) || \
+    ( \
+        (RSP.nextCmd != G_TRI1) && \
+        (RSP.nextCmd != G_TRI2) && \
+        (RSP.nextCmd != G_TRI4) && \
+        (RSP.nextCmd != G_QUAD) \
+    ) \
+) \
+{ \
+    OGL_DrawTriangles(); \
+}
+
+
+#define CLIP_X      0x03
+#define CLIP_NEGX   0x01
+#define CLIP_POSX   0x02
+
+#define CLIP_Y      0x0C
+#define CLIP_NEGY   0x04
+#define CLIP_POSY   0x08
+
+#define CLIP_Z      0x30
+#define CLIP_NEGZ   0x10
+#define CLIP_POSZ   0x20
+
+struct SPVertex
+{
+    f32     x, y, z, w;
+    f32     nx, ny, nz, __pad0;
+    f32     r, g, b, a;
+    f32     s, t;
+
+    u32     clip;
+    s16     flag;
+    s16     __pad1;
+};
+
+typedef SPVertex SPTriangle[3];
+
+struct SPLight
+{
+    f32 r, g, b;
+    f32 x, y, z;
+};
+
+struct gSPInfo
+{
+    u32 segment[16];
+
+    struct
+    {
+        u32 modelViewi, stackSize, billboard;
+        f32 modelView[32][4][4];
+        f32 projection[4][4];
+        f32 combined[4][4];
+    } matrix;
+
+    struct
+    {
+        f32 A, B, C, D;
+        f32 X, Y;
+        f32 baseScaleX, baseScaleY;
+    } objMatrix;
+
+    u32 vertexColorBase;
+    u32 vertexi;
+
+    SPLight lights[8];
+
+    struct
+    {
+        f32 scales, scalet;
+        s32 level, on, tile;
+    } texture;
+
+    gDPTile *textureTile[2];
+
+    struct
+    {
+        f32 vscale[4];
+        f32 vtrans[4];
+        f32 x, y, width, height;
+        f32 nearz, farz;
+    } viewport;
+
+    struct
+    {
+        s16 multiplier, offset;
+    } fog;
+
+    struct
+    {
+        u32 address, width, height, format, size, palette;
+    } bgImage;
+
+    u32 geometryMode;
+    s32 numLights;
+
+    u32 changed;
+
+    u32 status[4];
+
+    struct
+    {
+        u32 vtx, mtx;
+    } DMAOffsets;
+};
+
+extern gSPInfo gSP;
+
+void gSPLoadUcodeEx( u32 uc_start, u32 uc_dstart, u16 uc_dsize );
+void gSPNoOp();
+void gSPMatrix( u32 matrix, u8 param );
+void gSPDMAMatrix( u32 matrix, u8 index, u8 multiply );
+void gSPViewport( u32 v );
+void gSPForceMatrix( u32 mptr );
+void gSPLight( u32 l, s32 n );
+void gSPLookAt( u32 l );
+void gSPVertex( u32 v, u32 n, u32 v0 );
+void gSPCIVertex( u32 v, u32 n, u32 v0 );
+void gSPDMAVertex( u32 v, u32 n, u32 v0 );
+void gSPDisplayList( u32 dl );
+void gSPDMADisplayList( u32 dl, u32 n );
+void gSPBranchList( u32 dl );
+void gSPBranchLessZ( u32 branchdl, u32 vtx, f32 zval );
+void gSPSprite2DBase( u32 base );
+void gSPDMATriangles( u32 tris, u32 n );
+void gSP1Quadrangle( s32 v0, s32 v1, s32 v2, s32 v3 );
+void gSPCullDisplayList( u32 v0, u32 vn );
+void gSPPopMatrix( u32 param );
+void gSPPopMatrixN( u32 param, u32 num );
+void gSPSegment( s32 seg, s32 base );
+void gSPClipRatio( u32 r );
+void gSPInsertMatrix( u32 where, u32 num );
+void gSPModifyVertex( u32 vtx, u32 where, u32 val );
+void gSPNumLights( s32 n );
+void gSPLightColor( u32 lightNum, u32 packedColor );
+void gSPFogFactor( s16 fm, s16 fo );
+void gSPPerspNormalize( u16 scale );
+void gSPTexture( f32 sc, f32 tc, s32 level, s32 tile, s32 on );
+void gSPEndDisplayList();
+void gSPGeometryMode( u32 clear, u32 set );
+void gSPSetGeometryMode( u32 mode );
+void gSPClearGeometryMode( u32 mode );
+void gSPLine3D( s32 v0, s32 v1, s32 flag );
+void gSPLineW3D( s32 v0, s32 v1, s32 wd, s32 flag );
+void gSPObjRectangle( u32 sp );
+void gSPObjSprite( u32 sp );
+void gSPObjLoadTxtr( u32 tx );
+void gSPObjLoadTxSprite( u32 txsp );
+void gSPObjLoadTxRectR( u32 txsp );
+void gSPBgRect1Cyc( u32 bg );
+void gSPBgRectCopy( u32 bg );
+void gSPObjMatrix( u32 mtx );
+void gSPObjSubMatrix( u32 mtx );
+void gSPSetDMAOffsets( u32 mtxoffset, u32 vtxoffset );
+void gSPSetVertexColorBase( u32 base );
+void gSPProcessVertex(u32 v);
+
+void gSPTriangleUnknown();
+
+void gSP1Triangle(s32 v0, s32 v1, s32 v2);
+void gSP2Triangles(const s32 v00, const s32 v01, const s32 v02, const s32 flag0,
+                    const s32 v10, const s32 v11, const s32 v12, const s32 flag1 );
+void gSP4Triangles(const s32 v00, const s32 v01, const s32 v02,
+                    const s32 v10, const s32 v11, const s32 v12,
+                    const s32 v20, const s32 v21, const s32 v22,
+                    const s32 v30, const s32 v31, const s32 v32 );
+
+
+//#ifdef __TRIBUFFER_OPT
+void __indexmap_init();
+void __indexmap_clear();
+u32 __indexmap_findunused(u32 num);
+u32 __indexmap_getnew(u32 index, u32 num);
+//#endif
+
+#ifdef __VEC4_OPT
+extern void (*gSPTransformVertex4)(u32 v, float mtx[4][4]);
+extern void (*gSPTransformNormal4)(u32 v, float mtx[4][4]);
+extern void (*gSPLightVertex4)(u32 v);
+extern void (*gSPBillboardVertex4)(u32 v);
+#endif
+extern void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]);
+extern void (*gSPLightVertex)(u32 v);
+extern void (*gSPBillboardVertex)(u32 v, u32 i);
+
+#ifdef __NEON_OPT
+void gSPInitNeon();
+#endif
+
+#endif
+
diff --git a/source/gles2n64/src/gSPNeon.cpp b/source/gles2n64/src/gSPNeon.cpp
new file mode 100644 (file)
index 0000000..109aba4
--- /dev/null
@@ -0,0 +1,563 @@
+#include "gSP.h"
+#include "OpenGL.h"
+
+#ifdef __VEC4_OPT
+static void gSPTransformVertex4NEON(u32 v, float mtx[4][4])
+{
+    float *ptr = &OGL.triangles.vertices[v].x;
+
+#if 0
+    volatile int tmp0, tmp1;
+       asm volatile (
+    "vld1.32           {d0, d1}, [%1, :128]                    \n\t"   //q0 = {x,y,z,w}
+    "add                   %1, %1, %4                          \n\t"   //q0 = {x,y,z,w}
+    "vld1.32           {d18, d19}, [%0, :128]!         \n\t"   //q9 = m
+    "vld1.32           {d2, d3}, [%1, :128]            \n\t"   //q1 = {x,y,z,w}
+    "add                   %1, %1, %4                  \n\t"   //q0 = {x,y,z,w}
+    "vld1.32           {d20, d21}, [%0, :128]!       \n\t"     //q10 = m
+    "vld1.32           {d4, d5}, [%1, :128]            \n\t"   //q2 = {x,y,z,w}
+    "add                   %1, %1, %4                  \n\t"   //q0 = {x,y,z,w}
+    "vld1.32           {d22, d23}, [%0, :128]!       \n\t"     //q11 = m
+    "vld1.32           {d6, d7}, [%1, :128]            \n\t"   //q3 = {x,y,z,w}
+    "vld1.32           {d24, d25}, [%0, :128]        \n\t"     //q12 = m
+    "sub                   %1, %1, %6                          \n\t"   //q0 = {x,y,z,w}
+
+    "vmov.f32          q13, q12                        \n\t"   //q13 = q12
+    "vmov.f32          q14, q12                        \n\t"   //q14 = q12
+    "vmov.f32          q15, q12                        \n\t"   //q15 = q12
+
+    "vmla.f32          q12, q9, d0[0]                  \n\t"   //q12 = q9*d0[0]
+    "vmla.f32          q13, q9, d2[0]                  \n\t"   //q13 = q9*d0[0]
+    "vmla.f32          q14, q9, d4[0]                  \n\t"   //q14 = q9*d0[0]
+    "vmla.f32          q15, q9, d6[0]                  \n\t"   //q15 = q9*d0[0]
+    "vmla.f32          q12, q10, d0[1]                 \n\t"   //q12 = q10*d0[1]
+    "vmla.f32          q13, q10, d2[1]                 \n\t"   //q13 = q10*d0[1]
+    "vmla.f32          q14, q10, d4[1]                 \n\t"   //q14 = q10*d0[1]
+    "vmla.f32          q15, q10, d6[1]                 \n\t"   //q15 = q10*d0[1]
+    "vmla.f32          q12, q11, d1[0]                 \n\t"   //q12 = q11*d1[0]
+    "vmla.f32          q13, q11, d3[0]                 \n\t"   //q13 = q11*d1[0]
+    "vmla.f32          q14, q11, d5[0]                 \n\t"   //q14 = q11*d1[0]
+    "vmla.f32          q15, q11, d7[0]                 \n\t"   //q15 = q11*d1[0]
+
+    "add                   %0, %1, %4                  \n\t"   //q0 = {x,y,z,w}
+    "add                   %2, %1, %5                  \n\t"   //q0 = {x,y,z,w}
+    "add                   %3, %1, %6                  \n\t"   //q0 = {x,y,z,w}
+    "vst1.32           {d24, d25}, [%1, :128]          \n\t"   //q12
+    "vst1.32           {d26, d27}, [%0, :128]      \n\t"       //q13
+    "vst1.32           {d28, d29}, [%2, :128]      \n\t"       //q14
+    "vst1.32           {d30, d31}, [%3, :128]          \n\t"   //q15
+       : "+&r"(mtx), "+&r"(ptr), "+r"(tmp0), "+r"(tmp1)
+       : "I"(sizeof(SPVertex)),"I"(2 * sizeof(SPVertex)), "I"(3 * sizeof(SPVertex))
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+      "d18","d19", "d20", "d21", "d22", "d23", "d24",
+      "d25", "d26", "d27", "d28", "d29", "d30", "d31", "memory"
+       );
+#else
+       asm volatile (
+       "vld1.32                {d0, d1}, [%1]                  \n\t"   //q0 = {x,y,z,w}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d2, d3}, [%1]          \n\t"   //q1 = {x,y,z,w}
+       "add                %1, %1, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d4, d5}, [%1]          \n\t"   //q2 = {x,y,z,w}
+       "add                %1, %1, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d6, d7}, [%1]          \n\t"   //q3 = {x,y,z,w}
+    "sub                   %1, %1, %3                          \n\t"   //q0 = {x,y,z,w}
+
+       "vld1.32                {d18, d19}, [%0]!               \n\t"   //q9 = m
+       "vld1.32                {d20, d21}, [%0]!       \n\t"   //q10 = m
+       "vld1.32                {d22, d23}, [%0]!       \n\t"   //q11 = m
+       "vld1.32                {d24, d25}, [%0]        \n\t"   //q12 = m
+
+       "vmov.f32               q13, q12                        \n\t"   //q13 = q12
+       "vmov.f32               q14, q12                        \n\t"   //q14 = q12
+       "vmov.f32               q15, q12                        \n\t"   //q15 = q12
+
+       "vmla.f32               q12, q9, d0[0]                  \n\t"   //q12 = q9*d0[0]
+       "vmla.f32               q13, q9, d2[0]                  \n\t"   //q13 = q9*d0[0]
+       "vmla.f32               q14, q9, d4[0]                  \n\t"   //q14 = q9*d0[0]
+       "vmla.f32               q15, q9, d6[0]                  \n\t"   //q15 = q9*d0[0]
+       "vmla.f32               q12, q10, d0[1]                 \n\t"   //q12 = q10*d0[1]
+       "vmla.f32               q13, q10, d2[1]                 \n\t"   //q13 = q10*d0[1]
+       "vmla.f32               q14, q10, d4[1]                 \n\t"   //q14 = q10*d0[1]
+       "vmla.f32               q15, q10, d6[1]                 \n\t"   //q15 = q10*d0[1]
+       "vmla.f32               q12, q11, d1[0]                 \n\t"   //q12 = q11*d1[0]
+       "vmla.f32               q13, q11, d3[0]                 \n\t"   //q13 = q11*d1[0]
+       "vmla.f32               q14, q11, d5[0]                 \n\t"   //q14 = q11*d1[0]
+       "vmla.f32               q15, q11, d7[0]                 \n\t"   //q15 = q11*d1[0]
+
+       "vst1.32                {d24, d25}, [%1]                \n\t"   //q12
+       "add                %1, %1, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vst1.32                {d26, d27}, [%1]            \n\t"       //q13
+       "add                %1, %1, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vst1.32                {d28, d29}, [%1]            \n\t"       //q14
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vst1.32                {d30, d31}, [%1]        \n\t"   //q15
+
+       : "+&r"(mtx), "+&r"(ptr)
+       : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex))
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+      "d18","d19", "d20", "d21", "d22", "d23", "d24",
+      "d25", "d26", "d27", "d28", "d29", "d30", "d31", "memory"
+       );
+#endif
+}
+
+//4x Transform normal and normalize
+static void gSPTransformNormal4NEON(u32 v, float mtx[4][4])
+{
+    void *ptr = (void*)&OGL.triangles.vertices[v].nx;
+       asm volatile (
+    "vld1.32           {d0, d1}, [%1]                  \n\t"   //q0 = {x,y,z,w}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d2, d3}, [%1]          \n\t"   //q1 = {x,y,z,w}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d4, d5}, [%1]          \n\t"   //q2 = {x,y,z,w}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d6, d7}, [%1]          \n\t"   //q3 = {x,y,z,w}
+    "sub                   %1, %1, %3                          \n\t"   //q0 = {x,y,z,w}
+
+       "vld1.32                {d18, d19}, [%0]!               \n\t"   //q9 = m
+       "vld1.32                {d20, d21}, [%0]!           \n\t"       //q10 = m+16
+       "vld1.32                {d22, d23}, [%0]        \n\t"   //q11 = m+32
+
+       "vmul.f32               q12, q9, d0[0]                  \n\t"   //q12 = q9*d0[0]
+       "vmul.f32               q13, q9, d2[0]                  \n\t"   //q13 = q9*d2[0]
+    "vmul.f32          q14, q9, d4[0]                  \n\t"   //q14 = q9*d4[0]
+    "vmul.f32          q15, q9, d6[0]                  \n\t"   //q15 = q9*d6[0]
+
+    "vmla.f32          q12, q10, d0[1]                 \n\t"   //q12 += q10*q0[1]
+    "vmla.f32          q13, q10, d2[1]                 \n\t"   //q13 += q10*q2[1]
+    "vmla.f32          q14, q10, d4[1]                 \n\t"   //q14 += q10*q4[1]
+    "vmla.f32          q15, q10, d6[1]                 \n\t"   //q15 += q10*q6[1]
+
+       "vmla.f32               q12, q11, d1[0]                 \n\t"   //q12 += q11*d1[0]
+       "vmla.f32               q13, q11, d3[0]                 \n\t"   //q13 += q11*d3[0]
+       "vmla.f32               q14, q11, d5[0]                 \n\t"   //q14 += q11*d5[0]
+       "vmla.f32               q15, q11, d7[0]                 \n\t"   //q15 += q11*d7[0]
+
+    "vmul.f32          q0, q12, q12                    \n\t"   //q0 = q12*q12
+    "vmul.f32          q1, q13, q13                    \n\t"   //q1 = q13*q13
+    "vmul.f32          q2, q14, q14                    \n\t"   //q2 = q14*q14
+    "vmul.f32          q3, q15, q15                    \n\t"   //q3 = q15*q15
+
+    "vpadd.f32                 d0, d0                                  \n\t"   //d0[0] = d0[0] + d0[1]
+    "vpadd.f32                 d2, d2                                  \n\t"   //d2[0] = d2[0] + d2[1]
+    "vpadd.f32                 d4, d4                                  \n\t"   //d4[0] = d4[0] + d4[1]
+    "vpadd.f32                 d6, d6                                  \n\t"   //d6[0] = d6[0] + d6[1]
+
+    "vmov.f32          s1, s2                                  \n\t"   //d0[1] = d1[0]
+    "vmov.f32      s5, s6                              \n\t"   //d2[1] = d3[0]
+    "vmov.f32      s9, s10                             \n\t"   //d4[1] = d5[0]
+    "vmov.f32          s13, s14                                \n\t"   //d6[1] = d7[0]
+
+    "vpadd.f32                 d0, d0, d2                      \n\t"   //d0 = {d0[0] + d0[1], d2[0] + d2[1]}
+    "vpadd.f32                 d1, d4, d6                      \n\t"   //d1 = {d4[0] + d4[1], d6[0] + d6[1]}
+
+       "vmov.f32               q1, q0                                  \n\t"   //q1 = q0
+       "vrsqrte.f32    q0, q0                                  \n\t"   //q0 = ~ 1.0 / sqrt(q0)
+       "vmul.f32               q2, q0, q1                              \n\t"   //q2 = q0 * q1
+       "vrsqrts.f32    q3, q2, q0                              \n\t"   //q3 = (3 - q0 * q2) / 2
+       "vmul.f32               q0, q0, q3                              \n\t"   //q0 = q0 * q3
+       "vmul.f32               q2, q0, q1                              \n\t"   //q2 = q0 * q1
+       "vrsqrts.f32    q3, q2, q0                              \n\t"   //q3 = (3 - q0 * q2) / 2
+       "vmul.f32               q0, q0, q3                              \n\t"   //q0 = q0 * q3
+
+       "vmul.f32               q3, q15, d1[1]                  \n\t"   //q3 = q15*d1[1]
+       "vmul.f32               q2, q14, d1[0]                  \n\t"   //q2 = q14*d1[0]
+       "vmul.f32               q1, q13, d0[1]                  \n\t"   //q1 = q13*d0[1]
+       "vmul.f32               q0, q12, d0[0]                  \n\t"   //q0 = q12*d0[0]
+
+       "vst1.32                {d0, d1}, [%1]              \n\t"       //d0={nx,ny,nz,pad}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vst1.32                {d2, d3}, [%1]              \n\t"       //d2={nx,ny,nz,pad}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vst1.32                {d4, d5}, [%1]              \n\t"       //d4={nx,ny,nz,pad}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+    "vst1.32           {d6, d7}, [%1]          \n\t"   //d6={nx,ny,nz,pad}
+
+    : "+&r"(mtx), "+&r"(ptr)
+    : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex))
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+      "d16","d17", "d18","d19", "d20", "d21", "d22",
+      "d23", "d24", "d25", "d26", "d27", "d28", "d29",
+      "d30", "d31", "memory"
+       );
+}
+
+static void gSPLightVertex4NEON(u32 v)
+{
+    volatile float result[16];
+
+       volatile int i = gSP.numLights;
+    volatile int tmp = 0;
+    volatile void *ptr0 = &(gSP.lights[0].r);
+    volatile void *ptr1 = &(OGL.triangles.vertices[v].nx);
+    volatile void *ptr2 = result;
+       volatile void *ptr3 = gSP.matrix.modelView[gSP.matrix.modelViewi];
+       asm volatile (
+    "vld1.32           {d0, d1}, [%1]                  \n\t"   //q0 = {x,y,z,w}
+       "add                %1, %1, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d2, d3}, [%1]          \n\t"   //q1 = {x,y,z,w}
+       "add                %1, %1, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d4, d5}, [%1]          \n\t"   //q2 = {x,y,z,w}
+       "add                %1, %1, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d6, d7}, [%1]          \n\t"   //q3 = {x,y,z,w}
+    "sub                   %1, %1, %3                          \n\t"   //q0 = {x,y,z,w}
+
+       "vld1.32                {d18, d19}, [%0]!               \n\t"   //q9 = m
+       "vld1.32                {d20, d21}, [%0]!           \n\t"       //q10 = m+16
+       "vld1.32                {d22, d23}, [%0]        \n\t"   //q11 = m+32
+
+       "vmul.f32               q12, q9, d0[0]                  \n\t"   //q12 = q9*d0[0]
+       "vmul.f32               q13, q9, d2[0]                  \n\t"   //q13 = q9*d2[0]
+    "vmul.f32          q14, q9, d4[0]                  \n\t"   //q14 = q9*d4[0]
+    "vmul.f32          q15, q9, d6[0]                  \n\t"   //q15 = q9*d6[0]
+
+    "vmla.f32          q12, q10, d0[1]                 \n\t"   //q12 += q10*q0[1]
+    "vmla.f32          q13, q10, d2[1]                 \n\t"   //q13 += q10*q2[1]
+    "vmla.f32          q14, q10, d4[1]                 \n\t"   //q14 += q10*q4[1]
+    "vmla.f32          q15, q10, d6[1]                 \n\t"   //q15 += q10*q6[1]
+
+       "vmla.f32               q12, q11, d1[0]                 \n\t"   //q12 += q11*d1[0]
+       "vmla.f32               q13, q11, d3[0]                 \n\t"   //q13 += q11*d3[0]
+       "vmla.f32               q14, q11, d5[0]                 \n\t"   //q14 += q11*d5[0]
+       "vmla.f32               q15, q11, d7[0]                 \n\t"   //q15 += q11*d7[0]
+
+    "vmul.f32          q0, q12, q12                    \n\t"   //q0 = q12*q12
+    "vmul.f32          q1, q13, q13                    \n\t"   //q1 = q13*q13
+    "vmul.f32          q2, q14, q14                    \n\t"   //q2 = q14*q14
+    "vmul.f32          q3, q15, q15                    \n\t"   //q3 = q15*q15
+
+    "vpadd.f32                 d0, d0                                  \n\t"   //d0[0] = d0[0] + d0[1]
+    "vpadd.f32                 d2, d2                                  \n\t"   //d2[0] = d2[0] + d2[1]
+    "vpadd.f32                 d4, d4                                  \n\t"   //d4[0] = d4[0] + d4[1]
+    "vpadd.f32                 d6, d6                                  \n\t"   //d6[0] = d6[0] + d6[1]
+
+    "vmov.f32          s1, s2                                  \n\t"   //d0[1] = d1[0]
+    "vmov.f32      s5, s6                              \n\t"   //d2[1] = d3[0]
+    "vmov.f32      s9, s10                             \n\t"   //d4[1] = d5[0]
+    "vmov.f32          s13, s14                                \n\t"   //d6[1] = d7[0]
+
+    "vpadd.f32                 d0, d0, d2                      \n\t"   //d0 = {d0[0] + d0[1], d2[0] + d2[1]}
+    "vpadd.f32                 d1, d4, d6                      \n\t"   //d1 = {d4[0] + d4[1], d6[0] + d6[1]}
+
+       "vmov.f32               q1, q0                                  \n\t"   //q1 = q0
+       "vrsqrte.f32    q0, q0                                  \n\t"   //q0 = ~ 1.0 / sqrt(q0)
+       "vmul.f32               q2, q0, q1                              \n\t"   //q2 = q0 * q1
+       "vrsqrts.f32    q3, q2, q0                              \n\t"   //q3 = (3 - q0 * q2) / 2
+       "vmul.f32               q0, q0, q3                              \n\t"   //q0 = q0 * q3
+       "vmul.f32               q2, q0, q1                              \n\t"   //q2 = q0 * q1
+       "vrsqrts.f32    q3, q2, q0                              \n\t"   //q3 = (3 - q0 * q2) / 2
+       "vmul.f32               q0, q0, q3                              \n\t"   //q0 = q0 * q3
+
+       "vmul.f32               q3, q15, d1[1]                  \n\t"   //q3 = q15*d1[1]
+       "vmul.f32               q2, q14, d1[0]                  \n\t"   //q2 = q14*d1[0]
+       "vmul.f32               q1, q13, d0[1]                  \n\t"   //q1 = q13*d0[1]
+       "vmul.f32               q0, q12, d0[0]                  \n\t"   //q0 = q12*d0[0]
+
+       "vst1.32                {d0, d1}, [%1]              \n\t"       //d0={nx,ny,nz,pad}
+       "add                %1, %1, %2                      \n\t"       //q0 = {x,y,z,w}
+       "vst1.32                {d2, d3}, [%1]              \n\t"       //d2={nx,ny,nz,pad}
+       "add                %1, %1, %2                      \n\t"       //q0 = {x,y,z,w}
+       "vst1.32                {d4, d5}, [%1]              \n\t"       //d4={nx,ny,nz,pad}
+       "add                %1, %1, %2                      \n\t"       //q0 = {x,y,z,w}
+    "vst1.32           {d6, d7}, [%1]          \n\t"   //d6={nx,ny,nz,pad}
+
+    : "+&r"(ptr3), "+&r"(ptr1)
+    : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex))
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+      "d16","d17", "d18","d19", "d20", "d21", "d22",
+      "d23", "d24", "d25", "d26", "d27", "d28", "d29",
+      "d30", "d31", "memory"
+       );
+    asm volatile (
+
+    "mov               %0, %5                          \n\t"   //r0=sizeof(light)
+    "mla               %0, %1, %0, %2                  \n\t"   //r0=r1*r0+r2
+
+    "vmov.f32          q8, q0                              \n\t"       //q8=q0
+    "vmov.f32          q9, q1                              \n\t"       //q9=q1
+    "vmov.f32          q10, q2                             \n\t"       //q10=q2
+    "vmov.f32          q11, q3                             \n\t"       //q11=q3
+
+    "vld1.32           {d0}, [%0]                          \n\t"       //d0={r,g}
+    "flds              s2, [%0, #8]                    \n\t"   //d1[0]={b}
+    "vmov.f32          q1, q0                              \n\t"       //q1=q0
+    "vmov.f32          q2, q0                              \n\t"       //q2=q0
+    "vmov.f32          q3, q0                              \n\t"       //q3=q0
+
+    "vmov.f32          q15, #0.0                       \n\t"   //q15=0
+    "vdup.f32          q15, d30[0]                     \n\t"   //q15=d30[0]
+
+    "cmp               %1, #0                          \n\t"   //
+    "beq               2f                              \n\t"   //(r1==0) goto 2
+
+    "1:                                                \n\t"   //
+    "vld1.32           {d8}, [%2]!                     \n\t"   //d8={r,g}
+    "flds              s18, [%2]               \n\t"   //q9[0]={b}
+    "add               %2, %2, #4              \n\t"   //q9[0]={b}
+    "vld1.32           {d10}, [%2]!                    \n\t"   //d10={x,y}
+    "flds              s22, [%2]               \n\t"   //d11[0]={z}
+    "add               %2, %2, #4              \n\t"   //q9[0]={b}
+
+    "vmov.f32          q13, q5                         \n\t"   //q13 = q5
+    "vmov.f32          q12, q4                         \n\t"   //q12 = q4
+
+    "vmul.f32          q4, q8, q13                     \n\t"   //q4 = q8*q13
+    "vmul.f32          q5, q9, q13                     \n\t"   //q5 = q9*q13
+    "vmul.f32          q6, q10, q13            \n\t"   //q6 = q10*q13
+    "vmul.f32          q7, q11, q13            \n\t"   //q7 = q11*q13
+
+    "vpadd.f32                 d8, d8                                  \n\t"   //d8[0] = d8[0] + d8[1]
+    "vpadd.f32                 d10, d10                                \n\t"   //d10[0] = d10[0] + d10[1]
+    "vpadd.f32                 d12, d12                                \n\t"   //d12[0] = d12[0] + d12[1]
+    "vpadd.f32                 d14, d14                                \n\t"   //d14[0] = d14[0] + d14[1]
+
+    "vmov.f32          s17, s18                                \n\t"   //d8[1] = d9[0]
+    "vmov.f32          s21, s22                                \n\t"   //d10[1] = d11[0]
+    "vmov.f32          s25, s26                                \n\t"   //d12[1] = d13[0]
+    "vmov.f32          s29, s30                                \n\t"   //d14[1] = d15[0]
+
+    "vpadd.f32                 d8, d8, d10                     \n\t"   //d8 = {d8[0] + d8[1], d10[0] + d10[1]}
+    "vpadd.f32                 d9, d12, d14                    \n\t"   //d9 = {d12[0] + d12[1], d14[0] + d14[1]}
+
+    "vmax.f32          q4, q4, q15                     \n\t"   //q4=max(q4, 0)
+
+    "vmla.f32          q0, q12, d8[0]                  \n\t"   //q0 +=
+    "vmla.f32          q1, q12, d8[1]                  \n\t"   //d1 = {d4[0] + d4[1], d6[0] + d6[1]}
+    "vmla.f32          q2, q12, d9[0]                  \n\t"   //d1 = {d4[0] + d4[1], d6[0] + d6[1]}
+    "vmla.f32          q3, q12, d9[1]                  \n\t"   //d1 = {d4[0] + d4[1], d6[0] + d6[1]}
+
+    "subs              %1, %1, #1                      \n\t"   //r1=r1 - 1
+    "bne               1b                              \n\t"   //(r1!=0) goto 1
+
+    "2:                                                \n\t"   //
+
+    "vmov.f32        q4, #1.0                  \n\t"   //
+    "vmin.f32          q0, q0, q4              \n\t"   //
+    "vmin.f32          q1, q1, q4              \n\t"   //
+    "vmin.f32          q2, q2, q4              \n\t"   //
+    "vmin.f32          q3, q3, q4              \n\t"   //
+    "vst1.32           {d0, d1}, [%4]!         \n\t"   //
+    "vst1.32           {d2, d3}, [%4]!             \n\t"       //
+    "vst1.32           {d4, d5}, [%4]!         \n\t"   //
+    "vst1.32           {d6, d7}, [%4]              \n\t"       //
+
+    : "+&r"(tmp), "+&r"(i), "+&r"(ptr0), "+&r"(ptr1), "+&r"(ptr2)
+    : "I"(sizeof(SPLight))
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+      "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+      "d16","d17", "d18","d19", "d20", "d21", "d22", "d23",
+      "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+      "memory", "cc"
+    );
+    OGL.triangles.vertices[v].r = result[0];
+    OGL.triangles.vertices[v].g = result[1];
+    OGL.triangles.vertices[v].b = result[2];
+    OGL.triangles.vertices[v+1].r = result[4];
+    OGL.triangles.vertices[v+1].g = result[5];
+    OGL.triangles.vertices[v+1].b = result[6];
+    OGL.triangles.vertices[v+2].r = result[8];
+    OGL.triangles.vertices[v+2].g = result[9];
+    OGL.triangles.vertices[v+2].b = result[10];
+    OGL.triangles.vertices[v+3].r = result[12];
+    OGL.triangles.vertices[v+3].g = result[13];
+    OGL.triangles.vertices[v+3].b = result[14];
+}
+
+static void gSPBillboardVertex4NEON(u32 v)
+{
+    int i = 0;
+
+#ifdef __TRIBUFFER_OPT
+    i = OGL.triangles.indexmap[0];
+#endif
+
+    void *ptr0 = (void*)&OGL.triangles.vertices[v].x;
+    void *ptr1 = (void*)&OGL.triangles.vertices[i].x;
+    asm volatile (
+
+    "vld1.32           {d0, d1}, [%0]                  \n\t"   //q0 = {x,y,z,w}
+       "add                %0, %0, %2                      \n\t"       //q0 = {x,y,z,w}
+       "vld1.32                {d2, d3}, [%0]          \n\t"   //q1 = {x,y,z,w}
+       "add                %0, %0, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d4, d5}, [%0]          \n\t"   //q2 = {x,y,z,w}
+       "add                %0, %0, %2                  \n\t"   //q0 = {x,y,z,w}
+       "vld1.32                {d6, d7}, [%0]          \n\t"   //q3 = {x,y,z,w}
+    "sub                   %0, %0, %3                          \n\t"   //q0 = {x,y,z,w}
+
+    "vld1.32           {d16, d17}, [%1]                \n\t"   //q2={x1,y1,z1,w1}
+    "vadd.f32          q0, q0, q8                          \n\t"       //q1=q1+q1
+    "vadd.f32          q1, q1, q8                          \n\t"       //q1=q1+q1
+    "vadd.f32          q2, q2, q8                          \n\t"       //q1=q1+q1
+    "vadd.f32          q3, q3, q8                          \n\t"       //q1=q1+q1
+    "vst1.32           {d0, d1}, [%0]              \n\t"       //
+    "add                   %0, %0, %2                          \n\t"   //q0 = {x,y,z,w}
+       "vst1.32                {d2, d3}, [%0]          \n\t"   //
+    "add                   %0, %0, %2                      \n\t"       //q0 = {x,y,z,w}
+       "vst1.32                {d4, d5}, [%0]          \n\t"   //
+    "add                   %0, %0, %2                      \n\t"       //q0 = {x,y,z,w}
+       "vst1.32                {d6, d7}, [%0]          \n\t"   //
+    : "+&r"(ptr0), "+&r"(ptr1)
+    : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex))
+    : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+      "d16", "d17", "memory"
+    );
+}
+#endif
+
+static void gSPTransformVertexNEON(float vtx[4], float mtx[4][4])
+{
+//optimised using cycle analyser
+#if 0
+    volatile int tmp0, tmp1;
+       asm volatile (
+       "vld1.32                {d0, d1}, [%3, :128]            \n\t"   //q0 = *v
+       "add                %1, %0, #16                     \n\t"       //r1=r0+16
+       "vld1.32                {d18, d19}, [%0, :128]  \n\t"   //q9 = m
+       "add                %2, %0, #32                     \n\t"       //r2=r0+32
+       "vld1.32                {d20, d21}, [%1, :128]    \n\t" //q10 = m+4
+       "add                %0, %0, #48                     \n\t"       //r0=r0+48
+       "vld1.32                {d22, d23}, [%2, :128]          \n\t"   //q11 = m+8
+       "vld1.32                {d24, d25}, [%0, :128]          \n\t"   //q12 = m+12
+
+    "vmla.f32          q12, q9, d0[0]          \n\t"   //q12 = q12 + q9*Q0[0]
+    "vmul.f32          q13, q10, d0[1]         \n\t"   //q13 = Q10*Q0[1]
+    "vmul.f32          q14, q11, d1[0]         \n\t"   //q14 = Q11*Q0[2]
+    "vadd.f32          q12, q12, q13           \n\t"   //q12 = q12 + q14
+    "vadd.f32          q12, q12, q14           \n\t"   //Q12 = q12 + q15
+
+       "vst1.32                {d24, d25}, [%3, :128]  \n\t"   //*v = q12
+
+       : "+r"(mtx), "+r"(tmp0), "+r"(tmp1) : "r"(vtx)
+    : "d0", "d1", "d18","d19","d20","d21","d22","d23","d24","d25",
+       "d26", "d27", "memory"
+       );
+
+#else
+       asm volatile (
+       "vld1.32                {d0, d1}, [%1]                  \n\t"   //d8 = {x,y}
+       "vld1.32                {d18, d19}, [%0]!               \n\t"   //Q1 = m
+       "vld1.32                {d20, d21}, [%0]!       \n\t"   //Q2 = m+4
+       "vld1.32                {d22, d23}, [%0]!       \n\t"   //Q3 = m+8
+       "vld1.32                {d24, d25}, [%0]        \n\t"   //Q4 = m+12
+
+       "vmul.f32               q13, q9, d0[0]                  \n\t"   //Q5 = Q1*Q0[0]
+       "vmla.f32               q13, q10, d0[1]                 \n\t"   //Q5 += Q1*Q0[1]
+       "vmla.f32               q13, q11, d1[0]                 \n\t"   //Q5 += Q2*Q0[2]
+       "vadd.f32               q13, q13, q12                   \n\t"   //Q5 += Q3*Q0[3]
+       "vst1.32                {d26, d27}, [%1]                \n\t"   //Q4 = m+12
+
+       : "+r"(mtx) : "r"(vtx)
+    : "d0", "d1", "d18","d19","d20","d21","d22","d23","d24","d25",
+       "d26", "d27", "memory"
+       );
+#endif
+}
+
+static void gSPLightVertexNEON(u32 v)
+{
+    volatile float result[4];
+
+    volatile int tmp = 0;
+    volatile int i = gSP.numLights;
+    volatile void *ptr0 = &gSP.lights[0].r;
+    volatile void *ptr1 = &OGL.triangles.vertices[v].nx;
+    volatile void *ptr2 = result;;
+    volatile void *ptr3 = gSP.matrix.modelView[gSP.matrix.modelViewi];
+
+       asm volatile (
+       "vld1.32                {d0, d1}, [%1]                  \n\t"   //Q0 = v
+       "vld1.32                {d18, d19}, [%0]!               \n\t"   //Q1 = m
+       "vld1.32                {d20, d21}, [%0]!           \n\t"       //Q2 = m+4
+       "vld1.32                {d22, d23}, [%0]            \n\t"       //Q3 = m+8
+
+       "vmul.f32               q2, q9, d0[0]                   \n\t"   //q2 = q9*Q0[0]
+       "vmla.f32               q2, q10, d0[1]                  \n\t"   //Q5 += Q1*Q0[1]
+       "vmla.f32               q2, q11, d1[0]                  \n\t"   //Q5 += Q2*Q0[2]
+
+    "vmul.f32          d0, d4, d4                              \n\t"   //d0 = d0*d0
+       "vpadd.f32              d0, d0, d0                              \n\t"   //d0 = d[0] + d[1]
+    "vmla.f32          d0, d5, d5                              \n\t"   //d0 = d0 + d5*d5
+
+       "vmov.f32               d1, d0                                  \n\t"   //d1 = d0
+       "vrsqrte.f32    d0, d0                                  \n\t"   //d0 = ~ 1.0 / sqrt(d0)
+       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
+       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d2) / 2
+       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d3
+       "vmul.f32               d2, d0, d1                              \n\t"   //d2 = d0 * d1
+       "vrsqrts.f32    d3, d2, d0                              \n\t"   //d3 = (3 - d0 * d3) / 2
+       "vmul.f32               d0, d0, d3                              \n\t"   //d0 = d0 * d4
+
+       "vmul.f32               q1, q2, d0[0]                   \n\t"   //q1 = d2*d4
+
+       "vst1.32                {d2, d3}, [%1]              \n\t"       //d0={nx,ny,nz,pad}
+
+       : "+&r"(ptr3): "r"(ptr1)
+    : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory"
+       );
+
+    asm volatile (
+    "mov               %0, #24                         \n\t"   //r0=24
+    "mla               %0, %1, %0, %2                  \n\t"   //r0=r1*r0+r2
+
+    "vld1.32           {d0}, [%0]!                     \n\t"   //d0={r,g}
+    "flds              s2, [%0]                        \n\t"   //d1[0]={b}
+    "cmp            %0, #0                             \n\t"   //
+    "beq            2f                         \n\t"   //(r1==0) goto 2
+
+    "1:                                                \n\t"   //
+    "vld1.32           {d4}, [%2]!                     \n\t"   //d4={r,g}
+    "flds              s10, [%2]                       \n\t"   //q5[0]={b}
+    "add                   %2, %2, #4                  \n\t"   //r2+=4
+    "vld1.32           {d6}, [%2]!                     \n\t"   //d6={x,y}
+    "flds              s14, [%2]                       \n\t"   //d7[0]={z}
+    "add                   %2, %2, #4                  \n\t"   //r2+=4
+    "vmul.f32          d6, d2, d6                          \n\t"       //d6=d2*d6
+    "vpadd.f32                 d6, d6                              \n\t"       //d6=d6[0]+d6[1]
+    "vmla.f32          d6, d3, d7                          \n\t"       //d6=d6+d3*d7
+    "vmov.f32          d7, #0.0                        \n\t"   //d7=0
+    "vmax.f32          d6, d6, d7                  \n\t"       //d6=max(d6, d7)
+    "vmla.f32          q0, q2, d6[0]               \n\t"       //q0=q0+q2*d6[0]
+    "sub                   %1, %1, #1                  \n\t"   //r0=r0-1
+    "cmp                   %1, #0                      \n\t"   //r0=r0-1
+    "bgt                   1b                          \n\t"   //(r1!=0) ? goto 1
+    "b                     2f                          \n\t"   //(r1!=0) ? goto 1
+    "2:                                                \n\t"   //
+    "vmov.f32        q1, #1.0                  \n\t"   //
+    "vmin.f32        q0, q0, q1                        \n\t"   //
+    "vst1.32        {d0, d1}, [%3]             \n\t"   //
+
+    : "+&r"(tmp), "+&r"(i), "+&r"(ptr0), "+&r"(ptr2)
+    :: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+      "d16", "memory", "cc"
+    );
+    OGL.triangles.vertices[v].r = result[0];
+    OGL.triangles.vertices[v].g = result[1];
+    OGL.triangles.vertices[v].b = result[2];
+}
+
+static void gSPBillboardVertexNEON(u32 v, u32 i)
+{
+    asm volatile (
+    "vld1.32           {d2, d3}, [%0]                  \n\t"   //q1={x0,y0, z0, w0}
+    "vld1.32           {d4, d5}, [%1]                  \n\t"   //q2={x1,y1, z1, w1}
+    "vadd.f32          q1, q1, q2                          \n\t"       //q1=q1+q1
+    "vst1.32           {d2, d3}, [%0]              \n\t"       //
+    :: "r"(&OGL.triangles.vertices[v].x), "r"(&OGL.triangles.vertices[i].x)
+    : "d2", "d3", "d4", "d5", "memory"
+    );
+}
+
+void gSPInitNeon()
+{
+#ifdef __VEC4_OPT
+    gSPTransformVertex4 = gSPTransformVertex4NEON;
+    gSPTransformNormal4 = gSPTransformNormal4NEON;
+    gSPLightVertex4 = gSPLightVertex4NEON;
+    gSPBillboardVertex4 = gSPBillboardVertex4NEON;
+#endif
+    gSPTransformVertex = gSPTransformVertexNEON;
+    gSPLightVertex = gSPLightVertexNEON;
+    gSPBillboardVertex = gSPBillboardVertexNEON;
+}
diff --git a/source/gles2n64/src/gles2N64.cpp b/source/gles2n64/src/gles2N64.cpp
new file mode 100755 (executable)
index 0000000..69ec8ed
--- /dev/null
@@ -0,0 +1,343 @@
+
+#include <dlfcn.h>
+#include <string.h>
+//#include <cpu-features.h>
+
+#include "m64p_types.h"
+#include "m64p_plugin.h"
+
+#include "gles2N64.h"
+#include "Debug.h"
+#include "OpenGL.h"
+#include "N64.h"
+#include "RSP.h"
+#include "RDP.h"
+#include "VI.h"
+#include "Config.h"
+#include "Textures.h"
+#include "ShaderCombiner.h"
+#include "3DMath.h"
+#include "FrameSkipper.h"
+#include "ticks.h"
+
+//#include "ae_bridge.h"
+
+ptr_ConfigGetSharedDataFilepath ConfigGetSharedDataFilepath = NULL;
+
+static FrameSkipper frameSkipper;
+
+u32         last_good_ucode = (u32) -1;
+void        (*CheckInterrupts)( void );
+void        (*renderCallback)() = NULL;
+
+extern "C" {
+
+EXPORT m64p_error CALL PluginStartup(m64p_dynlib_handle CoreLibHandle,
+        void *Context, void (*DebugCallback)(void *, int, const char *))
+{
+printf("GLES2N64 Plugin StartUp\n");
+    ConfigGetSharedDataFilepath = (ptr_ConfigGetSharedDataFilepath)
+            dlsym(CoreLibHandle, "ConfigGetSharedDataFilepath");
+
+#ifdef __NEON_OPT
+/*    if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM &&
+            (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0)*/
+    {
+        MathInitNeon();
+        gSPInitNeon();
+    }
+#endif
+    return M64ERR_SUCCESS;
+}
+
+EXPORT m64p_error CALL PluginShutdown(void)
+{
+}
+
+EXPORT m64p_error CALL PluginGetVersion(m64p_plugin_type *PluginType,
+        int *PluginVersion, int *APIVersion, const char **PluginNamePtr,
+        int *Capabilities)
+{
+    /* set version info */
+    if (PluginType != NULL)
+        *PluginType = M64PLUGIN_GFX;
+
+    if (PluginVersion != NULL)
+        *PluginVersion = PLUGIN_VERSION;
+
+    if (APIVersion != NULL)
+        *APIVersion = PLUGIN_API_VERSION;
+    
+    if (PluginNamePtr != NULL)
+        *PluginNamePtr = PLUGIN_NAME;
+
+    if (Capabilities != NULL)
+    {
+        *Capabilities = 0;
+    }
+                    
+    return M64ERR_SUCCESS;
+}
+
+EXPORT void CALL ChangeWindow (void)
+{
+}
+
+EXPORT void CALL MoveScreen (int xpos, int ypos)
+{
+}
+
+EXPORT int CALL InitiateGFX (GFX_INFO Gfx_Info)
+{
+printf("InitateGFX\n");
+    DMEM = Gfx_Info.DMEM;
+    IMEM = Gfx_Info.IMEM;
+    RDRAM = Gfx_Info.RDRAM;
+
+    REG.MI_INTR = (u32*) Gfx_Info.MI_INTR_REG;
+    REG.DPC_START = (u32*) Gfx_Info.DPC_START_REG;
+    REG.DPC_END = (u32*) Gfx_Info.DPC_END_REG;
+    REG.DPC_CURRENT = (u32*) Gfx_Info.DPC_CURRENT_REG;
+    REG.DPC_STATUS = (u32*) Gfx_Info.DPC_STATUS_REG;
+    REG.DPC_CLOCK = (u32*) Gfx_Info.DPC_CLOCK_REG;
+    REG.DPC_BUFBUSY = (u32*) Gfx_Info.DPC_BUFBUSY_REG;
+    REG.DPC_PIPEBUSY = (u32*) Gfx_Info.DPC_PIPEBUSY_REG;
+    REG.DPC_TMEM = (u32*) Gfx_Info.DPC_TMEM_REG;
+
+    REG.VI_STATUS = (u32*) Gfx_Info.VI_STATUS_REG;
+    REG.VI_ORIGIN = (u32*) Gfx_Info.VI_ORIGIN_REG;
+    REG.VI_WIDTH = (u32*) Gfx_Info.VI_WIDTH_REG;
+    REG.VI_INTR = (u32*) Gfx_Info.VI_INTR_REG;
+    REG.VI_V_CURRENT_LINE = (u32*) Gfx_Info.VI_V_CURRENT_LINE_REG;
+    REG.VI_TIMING = (u32*) Gfx_Info.VI_TIMING_REG;
+    REG.VI_V_SYNC = (u32*) Gfx_Info.VI_V_SYNC_REG;
+    REG.VI_H_SYNC = (u32*) Gfx_Info.VI_H_SYNC_REG;
+    REG.VI_LEAP = (u32*) Gfx_Info.VI_LEAP_REG;
+    REG.VI_H_START = (u32*) Gfx_Info.VI_H_START_REG;
+    REG.VI_V_START = (u32*) Gfx_Info.VI_V_START_REG;
+    REG.VI_V_BURST = (u32*) Gfx_Info.VI_V_BURST_REG;
+    REG.VI_X_SCALE = (u32*) Gfx_Info.VI_X_SCALE_REG;
+    REG.VI_Y_SCALE = (u32*) Gfx_Info.VI_Y_SCALE_REG;
+
+    CheckInterrupts = Gfx_Info.CheckInterrupts;
+
+    Config_LoadConfig();
+    Config_LoadRomConfig(Gfx_Info.HEADER);
+
+    ticksInitialize();
+    if( config.autoFrameSkip )
+        frameSkipper.setSkips( FrameSkipper::AUTO, config.maxFrameSkip );
+    else
+        frameSkipper.setSkips( FrameSkipper::MANUAL, config.maxFrameSkip );
+
+       OGL_Start();
+
+    return 1;
+}
+
+EXPORT void CALL ProcessDList(void)
+{
+    OGL.frame_dl++;
+
+    if (frameSkipper.willSkipNext())
+    {
+        OGL.frameSkipped++;
+        RSP.busy = FALSE;
+        RSP.DList++;
+
+        /* avoid hang on frameskip */
+        *REG.MI_INTR |= MI_INTR_DP;
+        CheckInterrupts();
+        *REG.MI_INTR |= MI_INTR_SP;
+        CheckInterrupts();
+        return;
+    }
+
+    OGL.consecutiveSkips = 0;
+    RSP_ProcessDList();
+    OGL.mustRenderDlist = true;
+}
+
+EXPORT void CALL ProcessRDPList(void)
+{
+}
+
+EXPORT void CALL ResizeVideoOutput(int Width, int Height)
+{
+}
+
+EXPORT void CALL RomClosed (void)
+{
+    OGL_Stop();  // paulscode, OGL_Stop missing from Yongzh's code
+}
+
+EXPORT int CALL RomOpen (void)
+{
+    RSP_Init();
+    OGL.frame_vsync = 0;
+    OGL.frame_dl = 0;
+    OGL.frame_prevdl = -1;
+    OGL.mustRenderDlist = false;
+
+    frameSkipper.setTargetFPS(config.romPAL ? 50 : 60);
+    return 1;
+}
+
+EXPORT void CALL RomResumed(void)
+{
+    frameSkipper.start();
+}
+
+EXPORT void CALL ShowCFB (void)
+{
+}
+
+EXPORT void CALL UpdateScreen (void)
+{
+    frameSkipper.update();
+
+    //has there been any display lists since last update
+    if (OGL.frame_prevdl == OGL.frame_dl) return;
+
+    OGL.frame_prevdl = OGL.frame_dl;
+
+    if (OGL.frame_dl > 0) OGL.frame_vsync++;
+
+    if (OGL.mustRenderDlist)
+    {
+        OGL.screenUpdate=true;
+        VI_UpdateScreen();
+        OGL.mustRenderDlist = false;
+    }
+}
+
+EXPORT void CALL ViStatusChanged (void)
+{
+}
+
+EXPORT void CALL ViWidthChanged (void)
+{
+}
+
+/******************************************************************
+  Function: FrameBufferRead
+  Purpose:  This function is called to notify the dll that the
+            frame buffer memory is beening read at the given address.
+            DLL should copy content from its render buffer to the frame buffer
+            in N64 RDRAM
+            DLL is responsible to maintain its own frame buffer memory addr list
+            DLL should copy 4KB block content back to RDRAM frame buffer.
+            Emulator should not call this function again if other memory
+            is read within the same 4KB range
+
+            Since depth buffer is also being watched, the reported addr
+            may belong to depth buffer
+  input:    addr        rdram address
+            val         val
+            size        1 = uint8, 2 = uint16, 4 = uint32
+  output:   none
+*******************************************************************/ 
+
+EXPORT void CALL FBRead(u32 addr)
+{
+}
+
+/******************************************************************
+  Function: FrameBufferWrite
+  Purpose:  This function is called to notify the dll that the
+            frame buffer has been modified by CPU at the given address.
+
+            Since depth buffer is also being watched, the reported addr
+            may belong to depth buffer
+
+  input:    addr        rdram address
+            val         val
+            size        1 = uint8, 2 = uint16, 4 = uint32
+  output:   none
+*******************************************************************/ 
+
+EXPORT void CALL FBWrite(u32 addr, u32 size)
+{
+}
+
+/************************************************************************
+Function: FBGetFrameBufferInfo
+Purpose:  This function is called by the emulator core to retrieve frame
+          buffer information from the video plugin in order to be able
+          to notify the video plugin about CPU frame buffer read/write
+          operations
+
+          size:
+            = 1     byte
+            = 2     word (16 bit) <-- this is N64 default depth buffer format
+            = 4     dword (32 bit)
+
+          when frame buffer information is not available yet, set all values
+          in the FrameBufferInfo structure to 0
+
+input:    FrameBufferInfo pinfo[6]
+          pinfo is pointed to a FrameBufferInfo structure which to be
+          filled in by this function
+output:   Values are return in the FrameBufferInfo structure
+          Plugin can return up to 6 frame buffer info
+ ************************************************************************/
+
+EXPORT void CALL FBGetFrameBufferInfo(void *p)
+{
+}
+
+// paulscode, API changed this to "ReadScreen2" in Mupen64Plus 1.99.4
+EXPORT void CALL ReadScreen2(void *dest, int *width, int *height, int front)
+{
+/* TODO: 'int front' was added in 1.99.4.  What to do with this here? */
+    OGL_ReadScreen(dest, width, height);
+}
+
+EXPORT void CALL SetRenderingCallback(void (*callback)())
+{
+    renderCallback = callback;
+}
+
+EXPORT void CALL SetFrameSkipping(bool autoSkip, int maxSkips)
+{
+    frameSkipper.setSkips(
+            autoSkip ? FrameSkipper::AUTO : FrameSkipper::MANUAL,
+            maxSkips);
+}
+
+EXPORT void CALL SetStretchVideo(bool stretch)
+{
+    config.stretchVideo = stretch;
+}
+
+EXPORT void CALL StartGL()
+{
+    OGL_Start();
+}
+
+EXPORT void CALL StopGL()
+{
+    OGL_Stop();
+}
+
+EXPORT void CALL ResizeGL(int width, int height)
+{
+    const float ratio = (config.romPAL ? 9.0f/11.0f : 0.75f);
+    int videoWidth = width;
+    int videoHeight = height;
+
+    if (!config.stretchVideo) {
+        videoWidth = (int) (height / ratio);
+        if (videoWidth > width) {
+            videoWidth = width;
+            videoHeight = (int) (width * ratio);
+        }
+    }
+    int x = (width - videoWidth) / 2;
+    int y = (height - videoHeight) / 2;
+
+    OGL_ResizeWindow(x, y, videoWidth, videoHeight);
+}
+
+} // extern "C"
+
diff --git a/source/gles2n64/src/gles2N64.h b/source/gles2n64/src/gles2N64.h
new file mode 100644 (file)
index 0000000..044dcbe
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef GLN64_H
+#define GLN64_H
+
+#include "m64p_config.h"
+#include "stdio.h"
+
+
+#ifndef min
+#define min(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+//#define DEBUG
+
+#define PLUGIN_NAME     "gles2n64"
+#define PLUGIN_VERSION  0x000005
+#define PLUGIN_API_VERSION 0x020200
+
+extern ptr_ConfigGetSharedDataFilepath ConfigGetSharedDataFilepath;
+
+extern void (*CheckInterrupts)( void );
+extern void (*renderCallback)();
+
+
+#endif
+
diff --git a/source/gles2n64/src/sdl2_compat.h b/source/gles2n64/src/sdl2_compat.h
new file mode 100644 (file)
index 0000000..9f77089
--- /dev/null
@@ -0,0 +1,783 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2012 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <SDL_surface.h>
+
+typedef struct
+{
+    Uint8 *src;
+    int src_w, src_h;
+    int src_pitch;
+    int src_skip;
+    Uint8 *dst;
+    int dst_w, dst_h;
+    int dst_pitch;
+    int dst_skip;
+    SDL_PixelFormat *src_fmt;
+    SDL_PixelFormat *dst_fmt;
+    Uint8 *table;
+    int flags;
+    Uint32 colorkey;
+    Uint8 r, g, b, a;
+} SDL_BlitInfo;
+
+/* Blit mapping definition */
+typedef struct SDL_BlitMap
+{
+    SDL_Surface *dst;
+    int identity;
+    SDL_blit blit;
+    void *data;
+    SDL_BlitInfo info;
+
+    /* the version count matches the destination; mismatch indicates
+       an invalid mapping */
+    Uint32 dst_palette_version;
+    Uint32 src_palette_version;
+} SDL_BlitMap;
+
+typedef struct SDL_VideoInfo
+{
+    Uint32 hw_available:1;
+    Uint32 wm_available:1;
+    Uint32 UnusedBits1:6;
+    Uint32 UnusedBits2:1;
+    Uint32 blit_hw:1;
+    Uint32 blit_hw_CC:1;
+    Uint32 blit_hw_A:1;
+    Uint32 blit_sw:1;
+    Uint32 blit_sw_CC:1;
+    Uint32 blit_sw_A:1;
+    Uint32 blit_fill:1;
+    Uint32 UnusedBits3:16;
+    Uint32 video_mem;
+
+    SDL_PixelFormat *vfmt;
+
+    int current_w;
+    int current_h;
+} SDL_VideoInfo;
+
+#define SDL_ANYFORMAT       0x00100000
+#define SDL_HWPALETTE       0x00200000
+#define SDL_FULLSCREEN      0x00800000
+#define SDL_RESIZABLE       0x01000000
+#define SDL_NOFRAME         0x02000000
+#define SDL_OPENGL          0x04000000
+#define SDL_HWSURFACE       0x08000001  /**< \note Not used */
+
+#define SDL_BUTTON_WHEELUP      4
+#define SDL_BUTTON_WHEELDOWN    5
+
+int initialized_video = 0;
+
+static SDL_Window *SDL_VideoWindow = NULL;
+static SDL_Surface *SDL_WindowSurface = NULL;
+static SDL_Surface *SDL_VideoSurface = NULL;
+static SDL_Surface *SDL_ShadowSurface = NULL;
+static SDL_Surface *SDL_PublicSurface = NULL;
+static SDL_Rect SDL_VideoViewport;
+static char *wm_title = NULL;
+static Uint32 SDL_VideoFlags = 0;
+static SDL_GLContext *SDL_VideoContext = NULL;
+static SDL_Surface *SDL_VideoIcon;
+
+static void
+SDL_WM_SetCaption(const char *title, const char *icon)
+{
+    if (wm_title) {
+        SDL_free(wm_title);
+    }
+    if (title) {
+        wm_title = SDL_strdup(title);
+    } else {
+        wm_title = NULL;
+    }
+    SDL_SetWindowTitle(SDL_VideoWindow, wm_title);
+}
+
+static int
+GetVideoDisplay()
+{
+    const char *variable = SDL_getenv("SDL_VIDEO_FULLSCREEN_DISPLAY");
+    if ( !variable ) {
+        variable = SDL_getenv("SDL_VIDEO_FULLSCREEN_HEAD");
+    }
+    if ( variable ) {
+        return SDL_atoi(variable);
+    } else {
+        return 0;
+    }
+}
+
+static const SDL_VideoInfo *
+SDL_GetVideoInfo(void)
+{
+    static SDL_VideoInfo info;
+    SDL_DisplayMode mode;
+
+    /* Memory leak, compatibility code, who cares? */
+    if (!info.vfmt && SDL_GetDesktopDisplayMode(GetVideoDisplay(), &mode) == 0) {
+        info.vfmt = SDL_AllocFormat(mode.format);
+        info.current_w = mode.w;
+        info.current_h = mode.h;
+    }
+    return &info;
+}
+
+static SDL_Rect **
+SDL_ListModes(const SDL_PixelFormat * format, Uint32 flags)
+{
+    int i, nmodes;
+    SDL_Rect **modes;
+
+    if (!initialized_video) {
+        return NULL;
+    }
+
+    if (!(flags & SDL_FULLSCREEN)) {
+        return (SDL_Rect **) (-1);
+    }
+
+    if (!format) {
+        format = SDL_GetVideoInfo()->vfmt;
+    }
+
+    /* Memory leak, but this is a compatibility function, who cares? */
+    nmodes = 0;
+    modes = NULL;
+    for (i = 0; i < SDL_GetNumDisplayModes(GetVideoDisplay()); ++i) {
+        SDL_DisplayMode mode;
+        int bpp;
+
+        SDL_GetDisplayMode(GetVideoDisplay(), i, &mode);
+        if (!mode.w || !mode.h) {
+            return (SDL_Rect **) (-1);
+        }
+
+        /* Copied from src/video/SDL_pixels.c:SDL_PixelFormatEnumToMasks */
+        if (SDL_BYTESPERPIXEL(mode.format) <= 2) {
+            bpp = SDL_BITSPERPIXEL(mode.format);
+        } else {
+            bpp = SDL_BYTESPERPIXEL(mode.format) * 8;
+        }
+
+        if (bpp != format->BitsPerPixel) {
+            continue;
+        }
+        if (nmodes > 0 && modes[nmodes - 1]->w == mode.w
+            && modes[nmodes - 1]->h == mode.h) {
+            continue;
+        }
+
+        modes = (SDL_Rect**)SDL_realloc(modes, (nmodes + 2) * sizeof(*modes));
+        if (!modes) {
+            return NULL;
+        }
+        modes[nmodes] = (SDL_Rect *) SDL_malloc(sizeof(SDL_Rect));
+        if (!modes[nmodes]) {
+            return NULL;
+        }
+        modes[nmodes]->x = 0;
+        modes[nmodes]->y = 0;
+        modes[nmodes]->w = mode.w;
+        modes[nmodes]->h = mode.h;
+        ++nmodes;
+    }
+    if (modes) {
+        modes[nmodes] = NULL;
+    }
+    return modes;
+}
+
+static void
+SDL_UpdateRects(SDL_Surface * screen, int numrects, SDL_Rect * rects)
+{
+    int i;
+
+    if (screen == SDL_ShadowSurface) {
+        for (i = 0; i < numrects; ++i) {
+            SDL_BlitSurface(SDL_ShadowSurface, &rects[i], SDL_VideoSurface,
+                            &rects[i]);
+        }
+
+        /* Fall through to video surface update */
+        screen = SDL_VideoSurface;
+    }
+    if (screen == SDL_VideoSurface) {
+        if (SDL_VideoViewport.x || SDL_VideoViewport.y) {
+            SDL_Rect *stackrects = SDL_stack_alloc(SDL_Rect, numrects);
+            SDL_Rect *stackrect;
+            const SDL_Rect *rect;
+
+            /* Offset all the rectangles before updating */
+            for (i = 0; i < numrects; ++i) {
+                rect = &rects[i];
+                stackrect = &stackrects[i];
+                stackrect->x = SDL_VideoViewport.x + rect->x;
+                stackrect->y = SDL_VideoViewport.y + rect->y;
+                stackrect->w = rect->w;
+                stackrect->h = rect->h;
+            }
+            SDL_UpdateWindowSurfaceRects(SDL_VideoWindow, stackrects, numrects);
+            SDL_stack_free(stackrects);
+        } else {
+            SDL_UpdateWindowSurfaceRects(SDL_VideoWindow, rects, numrects);
+        }
+    }
+}
+
+static void
+SDL_UpdateRect(SDL_Surface * screen, Sint32 x, Sint32 y, Uint32 w, Uint32 h)
+{
+    if (screen) {
+        SDL_Rect rect;
+
+        /* Fill the rectangle */
+        rect.x = (int) x;
+        rect.y = (int) y;
+        rect.w = (int) (w ? w : screen->w);
+        rect.h = (int) (h ? h : screen->h);
+        SDL_UpdateRects(screen, 1, &rect);
+    }
+}
+
+static int
+SDL_Flip(SDL_Surface * screen)
+{
+    SDL_UpdateRect(screen, 0, 0, 0, 0);
+    return 0;
+}
+
+/*
+ * Calculate the pad-aligned scanline width of a surface
+ */
+static int
+SDL_CalculatePitch(SDL_Surface * surface)
+{
+    int pitch;
+
+    /* Surface should be 4-byte aligned for speed */
+    pitch = surface->w * surface->format->BytesPerPixel;
+    switch (surface->format->BitsPerPixel) {
+    case 1:
+        pitch = (pitch + 7) / 8;
+        break;
+    case 4:
+        pitch = (pitch + 1) / 2;
+        break;
+    default:
+        break;
+    }
+    pitch = (pitch + 3) & ~3;   /* 4-byte aligning */
+    return (pitch);
+}
+
+static void
+SDL_InvalidateMap(SDL_BlitMap * map)
+{
+    if (!map) {
+        return;
+    }
+    if (map->dst) {
+        /* Release our reference to the surface - see the note below */
+        if (--map->dst->refcount <= 0) {
+            SDL_FreeSurface(map->dst);
+        }
+    }
+    map->dst = NULL;
+    map->src_palette_version = 0;
+    map->dst_palette_version = 0;
+    if (map->info.table) {
+        SDL_free(map->info.table);
+        map->info.table = NULL;
+    }
+}
+
+static void
+SDL_GL_SwapBuffers(void)
+{
+    SDL_GL_SwapWindow(SDL_VideoWindow);
+}
+
+static int
+SDL_WM_ToggleFullScreen(SDL_Surface * surface)
+{
+    int length;
+    void *pixels;
+    Uint8 *src, *dst;
+    int row;
+    int window_w;
+    int window_h;
+
+    if (!SDL_PublicSurface) {
+        SDL_SetError("SDL_SetVideoMode() hasn't been called");
+        return 0;
+    }
+
+    /* Copy the old bits out */
+    length = SDL_PublicSurface->w * SDL_PublicSurface->format->BytesPerPixel;
+    pixels = SDL_malloc(SDL_PublicSurface->h * length);
+    if (pixels && SDL_PublicSurface->pixels) {
+        src = (Uint8*)SDL_PublicSurface->pixels;
+        dst = (Uint8*)pixels;
+        for (row = 0; row < SDL_PublicSurface->h; ++row) {
+            SDL_memcpy(dst, src, length);
+            src += SDL_PublicSurface->pitch;
+            dst += length;
+        }
+    }
+
+    /* Do the physical mode switch */
+    if (SDL_GetWindowFlags(SDL_VideoWindow) & SDL_WINDOW_FULLSCREEN) {
+        if (SDL_SetWindowFullscreen(SDL_VideoWindow, 0) < 0) {
+            return 0;
+        }
+        SDL_PublicSurface->flags &= ~SDL_FULLSCREEN;
+    } else {
+        if (SDL_SetWindowFullscreen(SDL_VideoWindow, 1) < 0) {
+            return 0;
+        }
+        SDL_PublicSurface->flags |= SDL_FULLSCREEN;
+    }
+
+    /* Recreate the screen surface */
+    SDL_WindowSurface = SDL_GetWindowSurface(SDL_VideoWindow);
+    if (!SDL_WindowSurface) {
+        /* We're totally hosed... */
+        return 0;
+    }
+
+    /* Center the public surface in the window surface */
+    SDL_GetWindowSize(SDL_VideoWindow, &window_w, &window_h);
+    SDL_VideoViewport.x = (window_w - SDL_VideoSurface->w)/2;
+    SDL_VideoViewport.y = (window_h - SDL_VideoSurface->h)/2;
+    SDL_VideoViewport.w = SDL_VideoSurface->w;
+    SDL_VideoViewport.h = SDL_VideoSurface->h;
+
+    /* Do some shuffling behind the application's back if format changes */
+    if (SDL_VideoSurface->format->format != SDL_WindowSurface->format->format) {
+        if (SDL_ShadowSurface) {
+            if (SDL_ShadowSurface->format->format == SDL_WindowSurface->format->format) {
+                /* Whee!  We don't need a shadow surface anymore! */
+                SDL_VideoSurface->flags &= ~SDL_DONTFREE;
+                SDL_FreeSurface(SDL_VideoSurface);
+                SDL_free(SDL_ShadowSurface->pixels);
+                SDL_VideoSurface = SDL_ShadowSurface;
+                SDL_VideoSurface->flags |= SDL_PREALLOC;
+                SDL_ShadowSurface = NULL;
+            } else {
+                /* No problem, just change the video surface format */
+                SDL_FreeFormat(SDL_VideoSurface->format);
+                SDL_VideoSurface->format = SDL_WindowSurface->format;
+                SDL_VideoSurface->format->refcount++;
+                SDL_InvalidateMap(SDL_ShadowSurface->map);
+            }
+        } else {
+            /* We can make the video surface the shadow surface */
+            SDL_ShadowSurface = SDL_VideoSurface;
+            SDL_ShadowSurface->pitch = SDL_CalculatePitch(SDL_ShadowSurface);
+            SDL_ShadowSurface->pixels = SDL_malloc(SDL_ShadowSurface->h * SDL_ShadowSurface->pitch);
+            if (!SDL_ShadowSurface->pixels) {
+                /* Uh oh, we're hosed */
+                SDL_ShadowSurface = NULL;
+                return 0;
+            }
+            SDL_ShadowSurface->flags &= ~SDL_PREALLOC;
+
+            SDL_VideoSurface = SDL_CreateRGBSurfaceFrom(NULL, 0, 0, 32, 0, 0, 0, 0, 0);
+            SDL_VideoSurface->flags = SDL_ShadowSurface->flags;
+            SDL_VideoSurface->flags |= SDL_PREALLOC;
+            SDL_FreeFormat(SDL_VideoSurface->format);
+            SDL_VideoSurface->format = SDL_WindowSurface->format;
+            SDL_VideoSurface->format->refcount++;
+            SDL_VideoSurface->w = SDL_ShadowSurface->w;
+            SDL_VideoSurface->h = SDL_ShadowSurface->h;
+        }
+    }
+
+    /* Update the video surface */
+    SDL_VideoSurface->pitch = SDL_WindowSurface->pitch;
+    SDL_VideoSurface->pixels = (void *)((Uint8 *)SDL_WindowSurface->pixels +
+        SDL_VideoViewport.y * SDL_VideoSurface->pitch +
+        SDL_VideoViewport.x  * SDL_VideoSurface->format->BytesPerPixel);
+    SDL_SetClipRect(SDL_VideoSurface, NULL);
+
+    /* Copy the old bits back */
+    if (pixels) {
+        src = (Uint8*)pixels;
+        dst = (Uint8*)SDL_PublicSurface->pixels;
+        for (row = 0; row < SDL_PublicSurface->h; ++row) {
+            SDL_memcpy(dst, src, length);
+            src += length;
+            dst += SDL_PublicSurface->pitch;
+        }
+        SDL_Flip(SDL_PublicSurface);
+        SDL_free(pixels);
+    }
+
+    /* We're done! */
+    return 1;
+}
+
+static void
+ClearVideoSurface()
+{
+    if (SDL_ShadowSurface) {
+        SDL_FillRect(SDL_ShadowSurface, NULL,
+            SDL_MapRGB(SDL_ShadowSurface->format, 0, 0, 0));
+    }
+    SDL_FillRect(SDL_WindowSurface, NULL, 0);
+    SDL_UpdateWindowSurface(SDL_VideoWindow);
+}
+
+static int
+SDL_ResizeVideoMode(int width, int height, int bpp, Uint32 flags)
+{
+    int w, h;
+
+    /* We can't resize something we don't have... */
+    if (!SDL_VideoSurface) {
+        return -1;
+    }
+
+    /* We probably have to recreate the window in fullscreen mode */
+    if (flags & SDL_FULLSCREEN) {
+        return -1;
+    }
+
+    /* I don't think there's any change we can gracefully make in flags */
+    if (flags != SDL_VideoFlags) {
+        return -1;
+    }
+    if (bpp != SDL_VideoSurface->format->BitsPerPixel) {
+        return -1;
+    }
+
+    /* Resize the window */
+    SDL_GetWindowSize(SDL_VideoWindow, &w, &h);
+    if (w != width || h != height) {
+        SDL_SetWindowSize(SDL_VideoWindow, width, height);
+    }
+
+    /* If we're in OpenGL mode, just resize the stub surface and we're done! */
+    if (flags & SDL_OPENGL) {
+        SDL_VideoSurface->w = width;
+        SDL_VideoSurface->h = height;
+        return 0;
+    }
+
+    SDL_WindowSurface = SDL_GetWindowSurface(SDL_VideoWindow);
+    if (!SDL_WindowSurface) {
+        return -1;
+    }
+    if (SDL_VideoSurface->format != SDL_WindowSurface->format) {
+        return -1;
+    }
+    SDL_VideoSurface->w = width;
+    SDL_VideoSurface->h = height;
+    SDL_VideoSurface->pixels = SDL_WindowSurface->pixels;
+    SDL_VideoSurface->pitch = SDL_WindowSurface->pitch;
+    SDL_SetClipRect(SDL_VideoSurface, NULL);
+
+    if (SDL_ShadowSurface) {
+        SDL_ShadowSurface->w = width;
+        SDL_ShadowSurface->h = height;
+        SDL_ShadowSurface->pitch = SDL_CalculatePitch(SDL_ShadowSurface);
+        SDL_ShadowSurface->pixels =
+            SDL_realloc(SDL_ShadowSurface->pixels,
+                        SDL_ShadowSurface->h * SDL_ShadowSurface->pitch);
+        SDL_SetClipRect(SDL_ShadowSurface, NULL);
+        SDL_InvalidateMap(SDL_ShadowSurface->map);
+    } else {
+        SDL_PublicSurface = SDL_VideoSurface;
+    }
+
+    ClearVideoSurface();
+
+    return 0;
+}
+
+static int
+SDL_CompatEventFilter(void *userdata, SDL_Event * event)
+{
+    SDL_Event fake;
+
+    switch (event->type) {
+    case SDL_WINDOWEVENT:
+        switch (event->window.event) {
+        case SDL_WINDOWEVENT_CLOSE:
+            fake.type = SDL_QUIT;
+            SDL_PushEvent(&fake);
+            break;
+        }
+    case SDL_TEXTINPUT:
+        {
+            /* FIXME: Generate an old style key repeat event if needed */
+            //printf("TEXTINPUT: '%s'\n", event->text.text);
+            break;
+        }
+    case SDL_MOUSEMOTION:
+        {
+            event->motion.x -= SDL_VideoViewport.x;
+            event->motion.y -= SDL_VideoViewport.y;
+            break;
+        }
+    case SDL_MOUSEBUTTONDOWN:
+    case SDL_MOUSEBUTTONUP:
+        {
+            event->button.x -= SDL_VideoViewport.x;
+            event->button.y -= SDL_VideoViewport.y;
+            break;
+        }
+    case SDL_MOUSEWHEEL:
+        {
+            Uint8 button;
+            int x, y;
+
+            if (event->wheel.y == 0) {
+                break;
+            }
+
+            SDL_GetMouseState(&x, &y);
+
+            if (event->wheel.y > 0) {
+                button = SDL_BUTTON_WHEELUP;
+            } else {
+                button = SDL_BUTTON_WHEELDOWN;
+            }
+
+            fake.button.button = button;
+            fake.button.x = x;
+            fake.button.y = y;
+            fake.button.windowID = event->wheel.windowID;
+
+            fake.type = SDL_MOUSEBUTTONDOWN;
+            fake.button.state = SDL_PRESSED;
+            SDL_PushEvent(&fake);
+
+            fake.type = SDL_MOUSEBUTTONUP;
+            fake.button.state = SDL_RELEASED;
+            SDL_PushEvent(&fake);
+            break;
+        }
+
+    }
+    return 1;
+}
+
+static void
+GetEnvironmentWindowPosition(int w, int h, int *x, int *y)
+{
+    int display = GetVideoDisplay();
+    const char *window = SDL_getenv("SDL_VIDEO_WINDOW_POS");
+    const char *center = SDL_getenv("SDL_VIDEO_CENTERED");
+    if (window) {
+        if (SDL_sscanf(window, "%d,%d", x, y) == 2) {
+            return;
+        }
+        if (SDL_strcmp(window, "center") == 0) {
+            center = window;
+        }
+    }
+    if (center) {
+        *x = SDL_WINDOWPOS_CENTERED_DISPLAY(display);
+        *y = SDL_WINDOWPOS_CENTERED_DISPLAY(display);
+    }
+}
+
+static SDL_Surface *
+SDL_SetVideoMode(int width, int height, int bpp, Uint32 flags)
+{
+    SDL_DisplayMode desktop_mode;
+    int display = GetVideoDisplay();
+    int window_x = SDL_WINDOWPOS_UNDEFINED_DISPLAY(display);
+    int window_y = SDL_WINDOWPOS_UNDEFINED_DISPLAY(display);
+    int window_w;
+    int window_h;
+    Uint32 window_flags;
+    Uint32 surface_flags;
+
+    if (!initialized_video) {
+        if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) < 0) {
+            return NULL;
+        }
+        initialized_video = 1;
+    }
+
+    SDL_GetDesktopDisplayMode(display, &desktop_mode);
+
+    if (width == 0) {
+        width = desktop_mode.w;
+    }
+    if (height == 0) {
+        height = desktop_mode.h;
+    }
+    if (bpp == 0) {
+        bpp = SDL_BITSPERPIXEL(desktop_mode.format);
+    }
+
+    /* See if we can simply resize the existing window and surface */
+    if (SDL_ResizeVideoMode(width, height, bpp, flags) == 0) {
+        return SDL_PublicSurface;
+    }
+
+    /* Destroy existing window */
+    SDL_PublicSurface = NULL;
+    if (SDL_ShadowSurface) {
+        SDL_ShadowSurface->flags &= ~SDL_DONTFREE;
+        SDL_FreeSurface(SDL_ShadowSurface);
+        SDL_ShadowSurface = NULL;
+    }
+    if (SDL_VideoSurface) {
+        SDL_VideoSurface->flags &= ~SDL_DONTFREE;
+        SDL_FreeSurface(SDL_VideoSurface);
+        SDL_VideoSurface = NULL;
+    }
+    if (SDL_VideoContext) {
+        /* SDL_GL_MakeCurrent(0, NULL); *//* Doesn't do anything */
+        SDL_GL_DeleteContext(SDL_VideoContext);
+        SDL_VideoContext = NULL;
+    }
+    if (SDL_VideoWindow) {
+        SDL_GetWindowPosition(SDL_VideoWindow, &window_x, &window_y);
+        SDL_DestroyWindow(SDL_VideoWindow);
+    }
+
+    /* Set up the event filter */
+    if (!SDL_GetEventFilter(NULL, NULL)) {
+        SDL_SetEventFilter(SDL_CompatEventFilter, NULL);
+    }
+
+    /* Create a new window */
+    window_flags = SDL_WINDOW_SHOWN;
+    if (flags & SDL_FULLSCREEN) {
+        window_flags |= SDL_WINDOW_FULLSCREEN;
+    }
+    if (flags & SDL_OPENGL) {
+        window_flags |= SDL_WINDOW_OPENGL;
+    }
+    if (flags & SDL_RESIZABLE) {
+        window_flags |= SDL_WINDOW_RESIZABLE;
+    }
+    if (flags & SDL_NOFRAME) {
+        window_flags |= SDL_WINDOW_BORDERLESS;
+    }
+    GetEnvironmentWindowPosition(width, height, &window_x, &window_y);
+    SDL_VideoWindow =
+        SDL_CreateWindow(wm_title, window_x, window_y, width, height,
+                         window_flags);
+    if (!SDL_VideoWindow) {
+        return NULL;
+    }
+    SDL_SetWindowIcon(SDL_VideoWindow, SDL_VideoIcon);
+
+    window_flags = SDL_GetWindowFlags(SDL_VideoWindow);
+    surface_flags = 0;
+    if (window_flags & SDL_WINDOW_FULLSCREEN) {
+        surface_flags |= SDL_FULLSCREEN;
+    }
+    if ((window_flags & SDL_WINDOW_OPENGL) && (flags & SDL_OPENGL)) {
+        surface_flags |= SDL_OPENGL;
+    }
+    if (window_flags & SDL_WINDOW_RESIZABLE) {
+        surface_flags |= SDL_RESIZABLE;
+    }
+    if (window_flags & SDL_WINDOW_BORDERLESS) {
+        surface_flags |= SDL_NOFRAME;
+    }
+
+    SDL_VideoFlags = flags;
+
+    /* If we're in OpenGL mode, just create a stub surface and we're done! */
+    if (flags & SDL_OPENGL) {
+        SDL_VideoContext = (SDL_GLContext *)SDL_GL_CreateContext(SDL_VideoWindow);
+        if (!SDL_VideoContext) {
+            return NULL;
+        }
+        if (SDL_GL_MakeCurrent(SDL_VideoWindow, SDL_VideoContext) < 0) {
+            return NULL;
+        }
+        SDL_VideoSurface =
+            SDL_CreateRGBSurfaceFrom(NULL, width, height, bpp, 0, 0, 0, 0, 0);
+        if (!SDL_VideoSurface) {
+            return NULL;
+        }
+        SDL_VideoSurface->flags |= surface_flags;
+        SDL_PublicSurface = SDL_VideoSurface;
+        return SDL_PublicSurface;
+    }
+
+    /* Create the screen surface */
+    SDL_WindowSurface = SDL_GetWindowSurface(SDL_VideoWindow);
+    if (!SDL_WindowSurface) {
+        return NULL;
+    }
+
+    /* Center the public surface in the window surface */
+    SDL_GetWindowSize(SDL_VideoWindow, &window_w, &window_h);
+    SDL_VideoViewport.x = (window_w - width)/2;
+    SDL_VideoViewport.y = (window_h - height)/2;
+    SDL_VideoViewport.w = width;
+    SDL_VideoViewport.h = height;
+
+    SDL_VideoSurface = SDL_CreateRGBSurfaceFrom(NULL, 0, 0, 32, 0, 0, 0, 0, 0);
+    SDL_VideoSurface->flags |= surface_flags;
+    SDL_VideoSurface->flags |= SDL_DONTFREE;
+    SDL_FreeFormat(SDL_VideoSurface->format);
+    SDL_VideoSurface->format = SDL_WindowSurface->format;
+    SDL_VideoSurface->format->refcount++;
+    SDL_VideoSurface->w = width;
+    SDL_VideoSurface->h = height;
+    SDL_VideoSurface->pitch = SDL_WindowSurface->pitch;
+    SDL_VideoSurface->pixels = (void *)((Uint8 *)SDL_WindowSurface->pixels +
+        SDL_VideoViewport.y * SDL_VideoSurface->pitch +
+        SDL_VideoViewport.x  * SDL_VideoSurface->format->BytesPerPixel);
+    SDL_SetClipRect(SDL_VideoSurface, NULL);
+
+    /* Create a shadow surface if necessary */
+    if ((bpp != SDL_VideoSurface->format->BitsPerPixel)
+        && !(flags & SDL_ANYFORMAT)) {
+        SDL_ShadowSurface =
+            SDL_CreateRGBSurface(0, width, height, bpp, 0, 0, 0, 0);
+        if (!SDL_ShadowSurface) {
+            return NULL;
+        }
+        SDL_ShadowSurface->flags |= surface_flags;
+        SDL_ShadowSurface->flags |= SDL_DONTFREE;
+
+        /* 8-bit SDL_ShadowSurface surfaces report that they have exclusive palette */
+        if (SDL_ShadowSurface->format->palette) {
+            SDL_ShadowSurface->flags |= SDL_HWPALETTE;
+            //TODO SDL_DitherColors(SDL_ShadowSurface->format->palette->colors,
+            //                 SDL_ShadowSurface->format->BitsPerPixel);
+        }
+        SDL_FillRect(SDL_ShadowSurface, NULL,
+            SDL_MapRGB(SDL_ShadowSurface->format, 0, 0, 0));
+    }
+    SDL_PublicSurface =
+        (SDL_ShadowSurface ? SDL_ShadowSurface : SDL_VideoSurface);
+
+    ClearVideoSurface();
+
+    /* We're finally done! */
+    return SDL_PublicSurface;
+}
diff --git a/source/gles2n64/src/ticks.c b/source/gles2n64/src/ticks.c
new file mode 100644 (file)
index 0000000..7819dcb
--- /dev/null
@@ -0,0 +1,35 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *   Copyright (C) 2011 yongzh (freeman.yong@gmail.com)                    *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include <time.h>
+
+static struct timespec startTicks;
+
+void ticksInitialize()
+{
+       clock_gettime(CLOCK_MONOTONIC, &startTicks);
+}
+
+unsigned int ticksGetTicks()
+{
+       struct timespec now;
+       clock_gettime(CLOCK_MONOTONIC, &now);
+       return (now.tv_sec - startTicks.tv_sec) * 1000 +
+                       (now.tv_nsec - startTicks.tv_nsec) / 1000000;
+}
diff --git a/source/gles2n64/src/ticks.h b/source/gles2n64/src/ticks.h
new file mode 100644 (file)
index 0000000..5960d19
--- /dev/null
@@ -0,0 +1,34 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *   Copyright (C) 2011 yongzh (freeman.yong@gmail.com)                    *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#ifndef EMUTICKS_H
+#define EMUTICKS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void ticksInitialize();
+unsigned int ticksGetTicks();
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/source/gles2n64/src/video_api_export.ver b/source/gles2n64/src/video_api_export.ver
new file mode 100644 (file)
index 0000000..96bc0fc
--- /dev/null
@@ -0,0 +1,28 @@
+{ global:
+PluginStartup;
+PluginShutdown;
+PluginGetVersion;
+ChangeWindow;
+InitiateGFX;
+MoveScreen;
+ProcessDList;
+ProcessRDPList;
+RomClosed;
+RomOpen;
+RomResumed;
+ShowCFB;
+UpdateScreen;
+ViStatusChanged;
+ViWidthChanged;
+ReadScreen2;
+SetRenderingCallback;
+ResizeVideoOutput;
+SetFrameSkipping;
+SetStretchVideo;
+FBRead;
+FBWrite;
+FBGetFrameBufferInfo;
+StartGL;
+StopGL;
+ResizeGL;
+local: *; };