From 34cf40586ac07c54d9bfc5be30f28743232b6d67 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 24 Sep 2013 22:01:24 +0200 Subject: [PATCH] GLES2N64 (from mupen64plus-ae) plugin. Compile and run on the OpenPandora --- source/gles2n64/Android.mk | 87 ++ source/gles2n64/Makefile | 176 +++ source/gles2n64/src/2xSAI.cpp | 647 ++++++++ source/gles2n64/src/2xSAI.h | 9 + source/gles2n64/src/3DMath.cpp | 67 + source/gles2n64/src/3DMath.h | 45 + source/gles2n64/src/3DMathNeon.cpp | 133 ++ source/gles2n64/src/COPYING | 172 +++ source/gles2n64/src/CRC.cpp | 93 ++ source/gles2n64/src/CRC.h | 7 + source/gles2n64/src/Common.h | 38 + source/gles2n64/src/Config.cpp | 307 ++++ source/gles2n64/src/Config.h | 79 + source/gles2n64/src/Debug.h | 30 + source/gles2n64/src/DepthBuffer.cpp | 165 ++ source/gles2n64/src/DepthBuffer.h | 28 + source/gles2n64/src/F3D.cpp | 374 +++++ source/gles2n64/src/F3D.h | 99 ++ source/gles2n64/src/F3DCBFD.cpp | 212 +++ source/gles2n64/src/F3DCBFD.h | 7 + source/gles2n64/src/F3DDKR.cpp | 124 ++ source/gles2n64/src/F3DDKR.h | 14 + source/gles2n64/src/F3DEX.cpp | 90 ++ source/gles2n64/src/F3DEX.h | 54 + source/gles2n64/src/F3DEX2.cpp | 255 ++++ source/gles2n64/src/F3DEX2.h | 88 ++ source/gles2n64/src/F3DPD.cpp | 59 + source/gles2n64/src/F3DPD.h | 8 + source/gles2n64/src/F3DWRUS.cpp | 73 + source/gles2n64/src/F3DWRUS.h | 8 + source/gles2n64/src/FrameSkipper.cpp | 61 + source/gles2n64/src/FrameSkipper.h | 55 + source/gles2n64/src/GBI.cpp | 975 ++++++++++++ source/gles2n64/src/GBI.h | 820 ++++++++++ source/gles2n64/src/Hash.h | 42 + source/gles2n64/src/L3D.cpp | 57 + source/gles2n64/src/L3D.h | 10 + source/gles2n64/src/L3DEX.cpp | 61 + source/gles2n64/src/L3DEX.h | 8 + source/gles2n64/src/L3DEX2.cpp | 61 + source/gles2n64/src/L3DEX2.h | 10 + source/gles2n64/src/N64.cpp | 11 + source/gles2n64/src/N64.h | 46 + source/gles2n64/src/OpenGL.cpp | 1361 +++++++++++++++++ source/gles2n64/src/OpenGL.h | 171 +++ source/gles2n64/src/RDP.cpp | 347 +++++ source/gles2n64/src/RDP.h | 7 + source/gles2n64/src/RSP.cpp | 150 ++ source/gles2n64/src/RSP.h | 34 + source/gles2n64/src/S2DEX.cpp | 102 ++ source/gles2n64/src/S2DEX.h | 219 +++ source/gles2n64/src/S2DEX2.cpp | 45 + source/gles2n64/src/S2DEX2.h | 20 + source/gles2n64/src/ShaderCombiner.cpp | 844 +++++++++++ source/gles2n64/src/ShaderCombiner.h | 258 ++++ source/gles2n64/src/Textures.cpp | 1334 +++++++++++++++++ source/gles2n64/src/Textures.h | 91 ++ source/gles2n64/src/Types.h | 42 + source/gles2n64/src/VI.cpp | 101 ++ source/gles2n64/src/VI.h | 27 + source/gles2n64/src/convert.h | 338 +++++ source/gles2n64/src/eglport.cpp | 706 +++++++++ source/gles2n64/src/eglport.h | 108 ++ source/gles2n64/src/gDP.cpp | 970 ++++++++++++ source/gles2n64/src/gDP.h | 289 ++++ source/gles2n64/src/gSP.cpp | 1738 ++++++++++++++++++++++ source/gles2n64/src/gSP.h | 264 ++++ source/gles2n64/src/gSPNeon.cpp | 563 +++++++ source/gles2n64/src/gles2N64.cpp | 343 +++++ source/gles2n64/src/gles2N64.h | 25 + source/gles2n64/src/sdl2_compat.h | 783 ++++++++++ source/gles2n64/src/ticks.c | 35 + source/gles2n64/src/ticks.h | 34 + source/gles2n64/src/video_api_export.ver | 28 + 74 files changed, 17142 insertions(+) create mode 100644 source/gles2n64/Android.mk create mode 100755 source/gles2n64/Makefile create mode 100644 source/gles2n64/src/2xSAI.cpp create mode 100644 source/gles2n64/src/2xSAI.h create mode 100644 source/gles2n64/src/3DMath.cpp create mode 100644 source/gles2n64/src/3DMath.h create mode 100644 source/gles2n64/src/3DMathNeon.cpp create mode 100644 source/gles2n64/src/COPYING create mode 100644 source/gles2n64/src/CRC.cpp create mode 100644 source/gles2n64/src/CRC.h create mode 100755 source/gles2n64/src/Common.h create mode 100644 source/gles2n64/src/Config.cpp create mode 100644 source/gles2n64/src/Config.h create mode 100644 source/gles2n64/src/Debug.h create mode 100644 source/gles2n64/src/DepthBuffer.cpp create mode 100644 source/gles2n64/src/DepthBuffer.h create mode 100644 source/gles2n64/src/F3D.cpp create mode 100644 source/gles2n64/src/F3D.h create mode 100644 source/gles2n64/src/F3DCBFD.cpp create mode 100644 source/gles2n64/src/F3DCBFD.h create mode 100644 source/gles2n64/src/F3DDKR.cpp create mode 100644 source/gles2n64/src/F3DDKR.h create mode 100644 source/gles2n64/src/F3DEX.cpp create mode 100644 source/gles2n64/src/F3DEX.h create mode 100644 source/gles2n64/src/F3DEX2.cpp create mode 100644 source/gles2n64/src/F3DEX2.h create mode 100644 source/gles2n64/src/F3DPD.cpp create mode 100644 source/gles2n64/src/F3DPD.h create mode 100644 source/gles2n64/src/F3DWRUS.cpp create mode 100644 source/gles2n64/src/F3DWRUS.h create mode 100644 source/gles2n64/src/FrameSkipper.cpp create mode 100644 source/gles2n64/src/FrameSkipper.h create mode 100644 source/gles2n64/src/GBI.cpp create mode 100644 source/gles2n64/src/GBI.h create mode 100644 source/gles2n64/src/Hash.h create mode 100644 source/gles2n64/src/L3D.cpp create mode 100644 source/gles2n64/src/L3D.h create mode 100644 source/gles2n64/src/L3DEX.cpp create mode 100644 source/gles2n64/src/L3DEX.h create mode 100644 source/gles2n64/src/L3DEX2.cpp create mode 100644 source/gles2n64/src/L3DEX2.h create mode 100644 source/gles2n64/src/N64.cpp create mode 100644 source/gles2n64/src/N64.h create mode 100755 source/gles2n64/src/OpenGL.cpp create mode 100644 source/gles2n64/src/OpenGL.h create mode 100644 source/gles2n64/src/RDP.cpp create mode 100644 source/gles2n64/src/RDP.h create mode 100644 source/gles2n64/src/RSP.cpp create mode 100644 source/gles2n64/src/RSP.h create mode 100644 source/gles2n64/src/S2DEX.cpp create mode 100644 source/gles2n64/src/S2DEX.h create mode 100644 source/gles2n64/src/S2DEX2.cpp create mode 100644 source/gles2n64/src/S2DEX2.h create mode 100755 source/gles2n64/src/ShaderCombiner.cpp create mode 100644 source/gles2n64/src/ShaderCombiner.h create mode 100644 source/gles2n64/src/Textures.cpp create mode 100644 source/gles2n64/src/Textures.h create mode 100644 source/gles2n64/src/Types.h create mode 100644 source/gles2n64/src/VI.cpp create mode 100644 source/gles2n64/src/VI.h create mode 100644 source/gles2n64/src/convert.h create mode 100755 source/gles2n64/src/eglport.cpp create mode 100755 source/gles2n64/src/eglport.h create mode 100644 source/gles2n64/src/gDP.cpp create mode 100644 source/gles2n64/src/gDP.h create mode 100644 source/gles2n64/src/gSP.cpp create mode 100644 source/gles2n64/src/gSP.h create mode 100644 source/gles2n64/src/gSPNeon.cpp create mode 100755 source/gles2n64/src/gles2N64.cpp create mode 100644 source/gles2n64/src/gles2N64.h create mode 100644 source/gles2n64/src/sdl2_compat.h create mode 100644 source/gles2n64/src/ticks.c create mode 100644 source/gles2n64/src/ticks.h create mode 100644 source/gles2n64/src/video_api_export.ver diff --git a/source/gles2n64/Android.mk b/source/gles2n64/Android.mk new file mode 100644 index 0000000..f2ae620 --- /dev/null +++ b/source/gles2n64/Android.mk @@ -0,0 +1,87 @@ +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) +SRCDIR := src + +LOCAL_MODULE := gles2n64 +LOCAL_SHARED_LIBRARIES := ae-imports SDL2 core +LOCAL_STATIC_LIBRARIES := cpufeatures +LOCAL_ARM_MODE := arm + +LOCAL_C_INCLUDES := \ + $(M64P_API_INCLUDES) \ + $(SDL_INCLUDES) \ + $(AE_BRIDGE_INCLUDES) \ + +LOCAL_SRC_FILES := \ + $(SRCDIR)/2xSAI.cpp \ + $(SRCDIR)/3DMath.cpp \ + $(SRCDIR)/Config.cpp \ + $(SRCDIR)/CRC.cpp \ + $(SRCDIR)/DepthBuffer.cpp \ + $(SRCDIR)/F3D.cpp \ + $(SRCDIR)/F3DCBFD.cpp \ + $(SRCDIR)/F3DDKR.cpp \ + $(SRCDIR)/F3DEX.cpp \ + $(SRCDIR)/F3DEX2.cpp \ + $(SRCDIR)/F3DPD.cpp \ + $(SRCDIR)/F3DWRUS.cpp \ + $(SRCDIR)/FrameSkipper.cpp \ + $(SRCDIR)/GBI.cpp \ + $(SRCDIR)/gDP.cpp \ + $(SRCDIR)/gles2N64.cpp \ + $(SRCDIR)/gSP.cpp \ + $(SRCDIR)/L3D.cpp \ + $(SRCDIR)/L3DEX.cpp \ + $(SRCDIR)/L3DEX2.cpp \ + $(SRCDIR)/N64.cpp \ + $(SRCDIR)/OpenGL.cpp \ + $(SRCDIR)/RDP.cpp \ + $(SRCDIR)/RSP.cpp \ + $(SRCDIR)/S2DEX.cpp \ + $(SRCDIR)/S2DEX2.cpp \ + $(SRCDIR)/ShaderCombiner.cpp \ + $(SRCDIR)/Textures.cpp \ + $(SRCDIR)/ticks.c \ + $(SRCDIR)/VI.cpp \ + +LOCAL_CFLAGS := \ + $(COMMON_CFLAGS) \ + -D__CRC_OPT \ + -D__HASHMAP_OPT \ + -D__TRIBUFFER_OPT \ + -D__VEC4_OPT \ + -DANDROID \ + -DUSE_SDL \ + -fsigned-char \ + #-DSDL_NO_COMPAT \ + +LOCAL_CPPFLAGS := $(COMMON_CPPFLAGS) + +LOCAL_LDFLAGS := -Wl,-version-script,$(LOCAL_PATH)/$(SRCDIR)/video_api_export.ver + +LOCAL_LDLIBS := \ + -lGLESv2 \ + -llog \ + +ifeq ($(TARGET_ARCH_ABI), armeabi-v7a) + # Use for ARM7a: + LOCAL_SRC_FILES += $(SRCDIR)/gSPNeon.cpp.neon + LOCAL_SRC_FILES += $(SRCDIR)/3DMathNeon.cpp.neon + LOCAL_CFLAGS += -DARM_ASM + LOCAL_CFLAGS += -D__NEON_OPT + +else ifeq ($(TARGET_ARCH_ABI), armeabi) + # Use for pre-ARM7a: + +else ifeq ($(TARGET_ARCH_ABI), x86) + # TODO: set the proper flags here + +else + # Any other architectures that Android could be running on? + +endif + +include $(BUILD_SHARED_LIBRARY) + +$(call import-module, android/cpufeatures) diff --git a/source/gles2n64/Makefile b/source/gles2n64/Makefile new file mode 100755 index 0000000..3048cfa --- /dev/null +++ b/source/gles2n64/Makefile @@ -0,0 +1,176 @@ +PIC ?= 1 +NO_ASM := 1 +CFLAGS += -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -mtune=cortex-a8 -fsigned-char -DNO_ASM -DPAULSCODE -Ofast -ffast-math -fno-strict-aliasing +CFLAGS += -I ../mupen64plus-core/src/api/ +CFLAGS += `sdl-config --cflags` + +SHARED = -shared +SRCDIR := src + +MODULE := gles2n64 +SHARED_LIBRARIES := SDL core +ARM_MODE := arm + +C_INCLUDES := \ + $(M64P_API_INCLUDES) \ + $(SDL_INCLUDES) \ + +SOURCE := \ + $(SRCDIR)/2xSAI.cpp \ + $(SRCDIR)/3DMath.cpp \ + $(SRCDIR)/Config.cpp \ + $(SRCDIR)/CRC.cpp \ + $(SRCDIR)/DepthBuffer.cpp \ + $(SRCDIR)/F3D.cpp \ + $(SRCDIR)/F3DCBFD.cpp \ + $(SRCDIR)/F3DDKR.cpp \ + $(SRCDIR)/F3DEX.cpp \ + $(SRCDIR)/F3DEX2.cpp \ + $(SRCDIR)/F3DPD.cpp \ + $(SRCDIR)/F3DWRUS.cpp \ + $(SRCDIR)/FrameSkipper.cpp \ + $(SRCDIR)/GBI.cpp \ + $(SRCDIR)/gDP.cpp \ + $(SRCDIR)/gles2N64.cpp \ + $(SRCDIR)/gSP.cpp \ + $(SRCDIR)/L3D.cpp \ + $(SRCDIR)/L3DEX.cpp \ + $(SRCDIR)/L3DEX2.cpp \ + $(SRCDIR)/N64.cpp \ + $(SRCDIR)/OpenGL.cpp \ + $(SRCDIR)/RDP.cpp \ + $(SRCDIR)/RSP.cpp \ + $(SRCDIR)/S2DEX.cpp \ + $(SRCDIR)/S2DEX2.cpp \ + $(SRCDIR)/ShaderCombiner.cpp \ + $(SRCDIR)/Textures.cpp \ + $(SRCDIR)/ticks.c \ + $(SRCDIR)/VI.cpp \ + $(SRCDIR)/eglport.cpp \ + +CFLAGS += \ + $(COMMON_CFLAGS) \ + -D__CRC_OPT \ + -D__HASHMAP_OPT \ + -D__TRIBUFFER_OPT \ + -D__VEC4_OPT \ + -DUSE_SDL \ + -fsigned-char \ + #-DSDL_NO_COMPAT \ + +ifeq ($(PIC), 1) + CFLAGS += -fPIC +else + CFLAGS += -fno-PIC +endif + +CPPFLAGS := $(CPPFLAGS) + +LDLIBS := \ + -lGLESv2 \ + -lEGL \ + -lrt \ + +# Use for ARM7a: +SOURCE += $(SRCDIR)/gSPNeon.cpp +SOURCE += $(SRCDIR)/3DMathNeon.cpp +CFLAGS += -DARM_ASM +CFLAGS += -D__NEON_OPT + +LDFLAGS += $(SHARED) + +include $(BUILD_SHARED_LIBRARY) + +# set base program pointers and flags +CC = $(CROSS_COMPILE)gcc +CXX = $(CROSS_COMPILE)g++ +RM ?= rm -f +INSTALL ?= install +MKDIR ?= mkdir -p +COMPILE.c = $(Q_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -gdwarf-2 -c +COMPILE.cc = $(Q_CXX)$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -gdwarf-2 -c +LINK.o = $(Q_LD)$(CXX) $(CXXFLAGS) $(LDFLAGS) $(TARGET_ARCH) -gdwarf-2 + +# set installation options +ifeq ($(PREFIX),) + PREFIX := /usr/local +endif +ifeq ($(SHAREDIR),) + SHAREDIR := $(PREFIX)/share/mupen64plus +endif +ifeq ($(LIBDIR),) + LIBDIR := $(PREFIX)/lib +endif +ifeq ($(PLUGINDIR),) + PLUGINDIR := $(LIBDIR)/mupen64plus +endif + +OBJDIR = _obj$(POSTFIX) + +# generate a list of object files build, make a temporary directory for them +OBJECTS := $(patsubst $(SRCDIR)/%.c, $(OBJDIR)/%.o, $(filter %.c, $(SOURCE))) +OBJECTS += $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(filter %.cpp, $(SOURCE))) +OBJDIRS = $(dir $(OBJECTS)) +$(shell $(MKDIR) $(OBJDIRS)) + +# build targets +TARGET = mupen64plus-video-gles2n64.so + +targets: + @echo "Mupen64plus-video-rice N64 Graphics plugin makefile. " + @echo " Targets:" + @echo " all == Build Mupen64plus-video-rice plugin" + @echo " clean == remove object files" + @echo " rebuild == clean and re-build all" + @echo " install == Install Mupen64Plus-video-rice plugin" + @echo " uninstall == Uninstall Mupen64Plus-video-rice plugin" + @echo " Options:" + @echo " BITS=32 == build 32-bit binaries on 64-bit machine" + @echo " NO_ASM=1 == build without inline assembly code (x86 MMX/SSE)" + @echo " APIDIR=path == path to find Mupen64Plus Core headers" + @echo " OPTFLAGS=flag == compiler optimization (default: -O3)" + @echo " WARNFLAGS=flag == compiler warning levels (default: -Wall)" + @echo " PIC=(1|0) == Force enable/disable of position independent code" + @echo " POSTFIX=name == String added to the name of the the build (default: '')" + @echo " Install Options:" + @echo " PREFIX=path == install/uninstall prefix (default: /usr/local)" + @echo " SHAREDIR=path == path to install shared data files (default: PREFIX/share/mupen64plus)" + @echo " LIBDIR=path == library prefix (default: PREFIX/lib)" + @echo " PLUGINDIR=path == path to install plugin libraries (default: LIBDIR/mupen64plus)" + @echo " DESTDIR=path == path to prepend to all installation paths (only for packagers)" + @echo " Debugging Options:" + @echo " DEBUG=1 == add debugging symbols" + @echo " V=1 == show verbose compiler output" + +all: $(TARGET) + +install: $(TARGET) + $(INSTALL) -d "$(DESTDIR)$(PLUGINDIR)" + $(INSTALL) -m 0644 $(INSTALL_STRIP_FLAG) $(TARGET) "$(DESTDIR)$(PLUGINDIR)" + $(INSTALL) -d "$(DESTDIR)$(SHAREDIR)" + +uninstall: + $(RM) "$(DESTDIR)$(PLUGINDIR)/$(TARGET)" + +clean: + $(RM) -r $(OBJDIR) $(TARGET) + +rebuild: clean all + +# build dependency files +CFLAGS += -MD +-include $(OBJECTS:.o=.d) + +CXXFLAGS += $(CFLAGS) + +# standard build rules +$(OBJDIR)/%.o: $(SRCDIR)/%.c + $(COMPILE.c) -o $@ $< + +$(OBJDIR)/%.o: $(SRCDIR)/%.cpp + $(COMPILE.cc) -o $@ $< + +$(TARGET): $(OBJECTS) + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ + +.PHONY: all clean install uninstall targets diff --git a/source/gles2n64/src/2xSAI.cpp b/source/gles2n64/src/2xSAI.cpp new file mode 100644 index 0000000..516f773 --- /dev/null +++ b/source/gles2n64/src/2xSAI.cpp @@ -0,0 +1,647 @@ +#include "Types.h" +//#include "GBI.h" + +static inline s16 GetResult1( u32 A, u32 B, u32 C, u32 D, u32 E ) +{ + s16 x = 0; + s16 y = 0; + s16 r = 0; + + if (A == C) x += 1; else if (B == C) y += 1; + if (A == D) x += 1; else if (B == D) y += 1; + if (x <= 1) r += 1; + if (y <= 1) r -= 1; + + return r; +} + +static inline s16 GetResult2( u32 A, u32 B, u32 C, u32 D, u32 E) +{ + s16 x = 0; + s16 y = 0; + s16 r = 0; + + if (A == C) x += 1; else if (B == C) y += 1; + if (A == D) x += 1; else if (B == D) y += 1; + if (x <= 1) r -= 1; + if (y <= 1) r += 1; + + return r; +} + + +static inline s16 GetResult( u32 A, u32 B, u32 C, u32 D ) +{ + s16 x = 0; + s16 y = 0; + s16 r = 0; + + if (A == C) x += 1; else if (B == C) y += 1; + if (A == D) x += 1; else if (B == D) y += 1; + if (x <= 1) r += 1; + if (y <= 1) r -= 1; + + return r; +} + +static inline u16 INTERPOLATE4444( u16 A, u16 B) +{ + if (A != B) + return ((A & 0xEEEE) >> 1) + + (((B & 0xEEEE) >> 1) | + (A & B & 0x1111)); + else + return A; +} + +static inline u16 INTERPOLATE5551( u16 A, u16 B) +{ + if (A != B) + return ((A & 0xF7BC) >> 1) + + (((B & 0xF7BC) >> 1) | + (A & B & 0x0843)); + else + return A; +} + +static inline u32 INTERPOLATE8888( u32 A, u32 B) +{ + if (A != B) + return ((A & 0xFEFEFEFE) >> 1) + + (((B & 0xFEFEFEFE) >> 1) | + (A & B & 0x01010101)); + else + return A; +} + +static inline u16 Q_INTERPOLATE4444( u16 A, u16 B, u16 C, u16 D) +{ + u16 x = ((A & 0xCCCC) >> 2) + + ((B & 0xCCCC) >> 2) + + ((C & 0xCCCC) >> 2) + + ((D & 0xCCCC) >> 2); + u16 y = (((A & 0x3333) + + (B & 0x3333) + + (C & 0x3333) + + (D & 0x3333)) >> 2) & 0x3333; + return x | y; +} + +static inline u16 Q_INTERPOLATE5551( u16 A, u16 B, u16 C, u16 D) +{ + u16 x = ((A & 0xE738) >> 2) + + ((B & 0xE738) >> 2) + + ((C & 0xE738) >> 2) + + ((D & 0xE738) >> 2); + u16 y = (((A & 0x18C6) + + (B & 0x18C6) + + (C & 0x18C6) + + (D & 0x18C6)) >> 2) & 0x18C6; + u16 z = ((A & 0x0001) + + (B & 0x0001) + + (C & 0x0001) + + (D & 0x0001)) > 2 ? 1 : 0; + return x | y | z; +} + +static inline u32 Q_INTERPOLATE8888( u32 A, u32 B, u32 C, u32 D) +{ + u32 x = ((A & 0xFCFCFCFC) >> 2) + + ((B & 0xFCFCFCFC) >> 2) + + ((C & 0xFCFCFCFC) >> 2) + + ((D & 0xFCFCFCFC) >> 2); + u32 y = (((A & 0x03030303) + + (B & 0x03030303) + + (C & 0x03030303) + + (D & 0x03030303)) >> 2) & 0x03030303; + return x | y; +} + +void _2xSaI4444( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT ) +{ + u16 destWidth = width << 1; + //u16 destHeight = height << 1; + + u32 colorA, colorB, colorC, colorD, + colorE, colorF, colorG, colorH, + colorI, colorJ, colorK, colorL, + colorM, colorN, colorO, colorP; + u32 product, product1, product2; + + s16 row0, row1, row2, row3; + s16 col0, col1, col2, col3; + + for (u16 y = 0; y < height; y++) + { + if (y > 0) + row0 = -width; + else + row0 = clampT ? 0 : (height - 1) * width; + + row1 = 0; + + if (y < height - 1) + { + row2 = width; + + if (y < height - 2) + row3 = width << 1; + else + row3 = clampT ? width : -y * width; + } + else + { + row2 = clampT ? 0 : -y * width; + row3 = clampT ? 0 : (1 - y) * width; + } + + for (u16 x = 0; x < width; x++) + { + if (x > 0) + col0 = -1; + else + col0 = clampS ? 0 : width - 1; + + col1 = 0; + + if (x < width - 1) + { + col2 = 1; + + if (x < width - 2) + col3 = 2; + else + col3 = clampS ? 1 : -x; + } + else + { + col2 = clampS ? 0 : -x; + col3 = clampS ? 0 : 1 - x; + } + +//--------------------------------------- +// Map of the pixels: I|E F|J +// G|A B|K +// H|C D|L +// M|N O|P + colorI = *(srcPtr + col0 + row0); + colorE = *(srcPtr + col1 + row0); + colorF = *(srcPtr + col2 + row0); + colorJ = *(srcPtr + col3 + row0); + + colorG = *(srcPtr + col0 + row1); + colorA = *(srcPtr + col1 + row1); + colorB = *(srcPtr + col2 + row1); + colorK = *(srcPtr + col3 + row1); + + colorH = *(srcPtr + col0 + row2); + colorC = *(srcPtr + col1 + row2); + colorD = *(srcPtr + col2 + row2); + colorL = *(srcPtr + col3 + row2); + + colorM = *(srcPtr + col0 + row3); + colorN = *(srcPtr + col1 + row3); + colorO = *(srcPtr + col2 + row3); + colorP = *(srcPtr + col3 + row3); + + if ((colorA == colorD) && (colorB != colorC)) + { + if ( ((colorA == colorE) && (colorB == colorL)) || + ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) ) + product = colorA; + else + product = INTERPOLATE4444(colorA, colorB); + + if (((colorA == colorG) && (colorC == colorO)) || + ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) ) + product1 = colorA; + else + product1 = INTERPOLATE4444(colorA, colorC); + + product2 = colorA; + } + else if ((colorB == colorC) && (colorA != colorD)) + { + if (((colorB == colorF) && (colorA == colorH)) || + ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) ) + product = colorB; + else + product = INTERPOLATE4444(colorA, colorB); + + if (((colorC == colorH) && (colorA == colorF)) || + ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) ) + product1 = colorC; + else + product1 = INTERPOLATE4444(colorA, colorC); + product2 = colorB; + } + else if ((colorA == colorD) && (colorB == colorC)) + { + if (colorA == colorB) + { + product = colorA; + product1 = colorA; + product2 = colorA; + } + else + { + s16 r = 0; + product1 = INTERPOLATE4444(colorA, colorC); + product = INTERPOLATE4444(colorA, colorB); + + r += GetResult1 (colorA, colorB, colorG, colorE, colorI); + r += GetResult2 (colorB, colorA, colorK, colorF, colorJ); + r += GetResult2 (colorB, colorA, colorH, colorN, colorM); + r += GetResult1 (colorA, colorB, colorL, colorO, colorP); + + if (r > 0) + product2 = colorA; + else if (r < 0) + product2 = colorB; + else + product2 = Q_INTERPOLATE4444(colorA, colorB, colorC, colorD); + } + } + else + { + product2 = Q_INTERPOLATE4444(colorA, colorB, colorC, colorD); + + if ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) + product = colorA; + else if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) + product = colorB; + else + product = INTERPOLATE4444(colorA, colorB); + + if ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) + product1 = colorA; + else if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) + product1 = colorC; + else + product1 = INTERPOLATE4444(colorA, colorC); + } + + destPtr[0] = colorA; + destPtr[1] = product; + destPtr[destWidth] = product1; + destPtr[destWidth + 1] = product2; + + srcPtr++; + destPtr += 2; + } + destPtr += destWidth; + } +} + +void _2xSaI5551( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT ) +{ + u16 destWidth = width << 1; + //u16 destHeight = height << 1; + + u32 colorA, colorB, colorC, colorD, + colorE, colorF, colorG, colorH, + colorI, colorJ, colorK, colorL, + colorM, colorN, colorO, colorP; + u32 product, product1, product2; + + s16 row0, row1, row2, row3; + s16 col0, col1, col2, col3; + + for (u16 y = 0; y < height; y++) + { + if (y > 0) + row0 = -width; + else + row0 = clampT ? 0 : (height - 1) * width; + + row1 = 0; + + if (y < height - 1) + { + row2 = width; + + if (y < height - 2) + row3 = width << 1; + else + row3 = clampT ? width : -y * width; + } + else + { + row2 = clampT ? 0 : -y * width; + row3 = clampT ? 0 : (1 - y) * width; + } + + for (u16 x = 0; x < width; x++) + { + if (x > 0) + col0 = -1; + else + col0 = clampS ? 0 : width - 1; + + col1 = 0; + + if (x < width - 1) + { + col2 = 1; + + if (x < width - 2) + col3 = 2; + else + col3 = clampS ? 1 : -x; + } + else + { + col2 = clampS ? 0 : -x; + col3 = clampS ? 0 : 1 - x; + } + +//--------------------------------------- +// Map of the pixels: I|E F|J +// G|A B|K +// H|C D|L +// M|N O|P + colorI = *(srcPtr + col0 + row0); + colorE = *(srcPtr + col1 + row0); + colorF = *(srcPtr + col2 + row0); + colorJ = *(srcPtr + col3 + row0); + + colorG = *(srcPtr + col0 + row1); + colorA = *(srcPtr + col1 + row1); + colorB = *(srcPtr + col2 + row1); + colorK = *(srcPtr + col3 + row1); + + colorH = *(srcPtr + col0 + row2); + colorC = *(srcPtr + col1 + row2); + colorD = *(srcPtr + col2 + row2); + colorL = *(srcPtr + col3 + row2); + + colorM = *(srcPtr + col0 + row3); + colorN = *(srcPtr + col1 + row3); + colorO = *(srcPtr + col2 + row3); + colorP = *(srcPtr + col3 + row3); + + if ((colorA == colorD) && (colorB != colorC)) + { + if ( ((colorA == colorE) && (colorB == colorL)) || + ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) ) + product = colorA; + else + product = INTERPOLATE5551(colorA, colorB); + + if (((colorA == colorG) && (colorC == colorO)) || + ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) ) + product1 = colorA; + else + product1 = INTERPOLATE5551(colorA, colorC); + + product2 = colorA; + } + else if ((colorB == colorC) && (colorA != colorD)) + { + if (((colorB == colorF) && (colorA == colorH)) || + ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) ) + product = colorB; + else + product = INTERPOLATE5551(colorA, colorB); + + if (((colorC == colorH) && (colorA == colorF)) || + ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) ) + product1 = colorC; + else + product1 = INTERPOLATE5551(colorA, colorC); + product2 = colorB; + } + else if ((colorA == colorD) && (colorB == colorC)) + { + if (colorA == colorB) + { + product = colorA; + product1 = colorA; + product2 = colorA; + } + else + { + s16 r = 0; + product1 = INTERPOLATE5551(colorA, colorC); + product = INTERPOLATE5551(colorA, colorB); + + r += GetResult1 (colorA, colorB, colorG, colorE, colorI); + r += GetResult2 (colorB, colorA, colorK, colorF, colorJ); + r += GetResult2 (colorB, colorA, colorH, colorN, colorM); + r += GetResult1 (colorA, colorB, colorL, colorO, colorP); + + if (r > 0) + product2 = colorA; + else if (r < 0) + product2 = colorB; + else + product2 = Q_INTERPOLATE5551(colorA, colorB, colorC, colorD); + } + } + else + { + product2 = Q_INTERPOLATE5551(colorA, colorB, colorC, colorD); + + if ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) + product = colorA; + else if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) + product = colorB; + else + product = INTERPOLATE5551(colorA, colorB); + + if ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) + product1 = colorA; + else if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) + product1 = colorC; + else + product1 = INTERPOLATE5551(colorA, colorC); + } + + destPtr[0] = colorA; + destPtr[1] = product; + destPtr[destWidth] = product1; + destPtr[destWidth + 1] = product2; + + srcPtr++; + destPtr += 2; + } + destPtr += destWidth; + } +} + +void _2xSaI8888( u32 *srcPtr, u32 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT ) +{ + u16 destWidth = width << 1; + //u16 destHeight = height << 1; + + u32 colorA, colorB, colorC, colorD, + colorE, colorF, colorG, colorH, + colorI, colorJ, colorK, colorL, + colorM, colorN, colorO, colorP; + u32 product, product1, product2; + + s16 row0, row1, row2, row3; + s16 col0, col1, col2, col3; + + for (u16 y = 0; y < height; y++) + { + if (y > 0) + row0 = -width; + else + row0 = clampT ? 0 : (height - 1) * width; + + row1 = 0; + + if (y < height - 1) + { + row2 = width; + + if (y < height - 2) + row3 = width << 1; + else + row3 = clampT ? width : -y * width; + } + else + { + row2 = clampT ? 0 : -y * width; + row3 = clampT ? 0 : (1 - y) * width; + } + + for (u16 x = 0; x < width; x++) + { + if (x > 0) + col0 = -1; + else + col0 = clampS ? 0 : width - 1; + + col1 = 0; + + if (x < width - 1) + { + col2 = 1; + + if (x < width - 2) + col3 = 2; + else + col3 = clampS ? 1 : -x; + } + else + { + col2 = clampS ? 0 : -x; + col3 = clampS ? 0 : 1 - x; + } + +//--------------------------------------- +// Map of the pixels: I|E F|J +// G|A B|K +// H|C D|L +// M|N O|P + colorI = *(srcPtr + col0 + row0); + colorE = *(srcPtr + col1 + row0); + colorF = *(srcPtr + col2 + row0); + colorJ = *(srcPtr + col3 + row0); + + colorG = *(srcPtr + col0 + row1); + colorA = *(srcPtr + col1 + row1); + colorB = *(srcPtr + col2 + row1); + colorK = *(srcPtr + col3 + row1); + + colorH = *(srcPtr + col0 + row2); + colorC = *(srcPtr + col1 + row2); + colorD = *(srcPtr + col2 + row2); + colorL = *(srcPtr + col3 + row2); + + colorM = *(srcPtr + col0 + row3); + colorN = *(srcPtr + col1 + row3); + colorO = *(srcPtr + col2 + row3); + colorP = *(srcPtr + col3 + row3); + + if ((colorA == colorD) && (colorB != colorC)) + { + if ( ((colorA == colorE) && (colorB == colorL)) || + ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) ) + product = colorA; + else + product = INTERPOLATE8888(colorA, colorB); + + if (((colorA == colorG) && (colorC == colorO)) || + ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) ) + product1 = colorA; + else + product1 = INTERPOLATE8888(colorA, colorC); + + product2 = colorA; + } + else if ((colorB == colorC) && (colorA != colorD)) + { + if (((colorB == colorF) && (colorA == colorH)) || + ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) ) + product = colorB; + else + product = INTERPOLATE8888(colorA, colorB); + + if (((colorC == colorH) && (colorA == colorF)) || + ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) ) + product1 = colorC; + else + product1 = INTERPOLATE8888(colorA, colorC); + product2 = colorB; + } + else if ((colorA == colorD) && (colorB == colorC)) + { + if (colorA == colorB) + { + product = colorA; + product1 = colorA; + product2 = colorA; + } + else + { + s16 r = 0; + product1 = INTERPOLATE8888(colorA, colorC); + product = INTERPOLATE8888(colorA, colorB); + + r += GetResult1 (colorA, colorB, colorG, colorE, colorI); + r += GetResult2 (colorB, colorA, colorK, colorF, colorJ); + r += GetResult2 (colorB, colorA, colorH, colorN, colorM); + r += GetResult1 (colorA, colorB, colorL, colorO, colorP); + + if (r > 0) + product2 = colorA; + else if (r < 0) + product2 = colorB; + else + product2 = Q_INTERPOLATE8888(colorA, colorB, colorC, colorD); + } + } + else + { + product2 = Q_INTERPOLATE8888(colorA, colorB, colorC, colorD); + + if ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) + product = colorA; + else if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) + product = colorB; + else + product = INTERPOLATE8888(colorA, colorB); + + if ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) + product1 = colorA; + else if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) + product1 = colorC; + else + product1 = INTERPOLATE8888(colorA, colorC); + } + + destPtr[0] = colorA; + destPtr[1] = product; + destPtr[destWidth] = product1; + destPtr[destWidth + 1] = product2; + + srcPtr++; + destPtr += 2; + } + destPtr += destWidth; + } +} + diff --git a/source/gles2n64/src/2xSAI.h b/source/gles2n64/src/2xSAI.h new file mode 100644 index 0000000..1b47cc9 --- /dev/null +++ b/source/gles2n64/src/2xSAI.h @@ -0,0 +1,9 @@ +#ifndef _2XSAI_H +#define _2XSAI_H +#include "Types.h" + +void _2xSaI8888( u32 *srcPtr, u32 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT ); +void _2xSaI4444( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT ); +void _2xSaI5551( u16 *srcPtr, u16 *destPtr, u16 width, u16 height, s32 clampS, s32 clampT ); +#endif + diff --git a/source/gles2n64/src/3DMath.cpp b/source/gles2n64/src/3DMath.cpp new file mode 100644 index 0000000..bb684c7 --- /dev/null +++ b/source/gles2n64/src/3DMath.cpp @@ -0,0 +1,67 @@ +#include + +static void MultMatrix_default( float m0[4][4], float m1[4][4], + float dest[4][4]) +{ + int i; + for (i = 0; i < 4; i++) + { + dest[0][i] = m0[0][i]*m1[0][0] + m0[1][i]*m1[0][1] + m0[2][i]*m1[0][2] + m0[3][i]*m1[0][3]; + dest[1][i] = m0[0][i]*m1[1][0] + m0[1][i]*m1[1][1] + m0[2][i]*m1[1][2] + m0[3][i]*m1[1][3]; + dest[2][i] = m0[0][i]*m1[2][0] + m0[1][i]*m1[2][1] + m0[2][i]*m1[2][2] + m0[3][i]*m1[2][3]; + dest[3][i] = m0[3][i]*m1[3][3] + m0[2][i]*m1[3][2] + m0[1][i]*m1[3][1] + m0[0][i]*m1[3][0]; + } +} + +static void TransformVectorNormalize_default(float vec[3], float mtx[4][4]) +{ + float len; + + vec[0] = mtx[0][0] * vec[0] + + mtx[1][0] * vec[1] + + mtx[2][0] * vec[2]; + vec[1] = mtx[0][1] * vec[0] + + mtx[1][1] * vec[1] + + mtx[2][1] * vec[2]; + vec[2] = mtx[0][2] * vec[0] + + mtx[1][2] * vec[1] + + mtx[2][2] * vec[2]; + len = vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2]; + if (len != 0.0) + { + len = sqrtf(len); + vec[0] /= len; + vec[1] /= len; + vec[2] /= len; + } +} + +static void Normalize_default(float v[3]) +{ + float len; + + len = (float)(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]); + if (len != 0.0) + { + len = (float)sqrt( len ); + v[0] /= (float)len; + v[1] /= (float)len; + v[2] /= (float)len; + } +} + +static float DotProduct_default( float v0[3], float v1[3] ) +{ + float dot; + dot = v0[0]*v1[0] + v0[1]*v1[1] + v0[2]*v1[2]; + return dot; +} + + +void (*MultMatrix)(float m0[4][4], float m1[4][4], float dest[4][4]) = + MultMatrix_default; +void (*TransformVectorNormalize)(float vec[3], float mtx[4][4]) = + TransformVectorNormalize_default; +void (*Normalize)(float v[3]) = Normalize_default; +float (*DotProduct)(float v0[3], float v1[3]) = DotProduct_default; + diff --git a/source/gles2n64/src/3DMath.h b/source/gles2n64/src/3DMath.h new file mode 100644 index 0000000..d639a97 --- /dev/null +++ b/source/gles2n64/src/3DMath.h @@ -0,0 +1,45 @@ +#ifndef _3DMATH_H +#define _3DMATH_H + +#include + +extern void (*MultMatrix)(float m0[4][4], float m1[4][4], float dest[4][4]); +extern void (*TransformVectorNormalize)(float vec[3], float mtx[4][4]); +extern void (*Normalize)(float v[3]); +extern float (*DotProduct)(float v0[3], float v1[3]); + +inline void CopyMatrix( float m0[4][4], float m1[4][4] ) +{ + memcpy( m0, m1, 16 * sizeof( float ) ); +} + +inline void MultMatrix2( float m0[4][4], float m1[4][4] ) +{ + float dst[4][4]; + MultMatrix(m0, m1, dst); + memcpy( m0, dst, sizeof(float) * 16 ); +} + +inline void Transpose3x3Matrix( float mtx[4][4] ) +{ + float tmp; + + tmp = mtx[0][1]; + mtx[0][1] = mtx[1][0]; + mtx[1][0] = tmp; + + tmp = mtx[0][2]; + mtx[0][2] = mtx[2][0]; + mtx[2][0] = tmp; + + tmp = mtx[1][2]; + mtx[1][2] = mtx[2][1]; + mtx[2][1] = tmp; +} + +#ifdef __NEON_OPT +void MathInitNeon(); +#endif + +#endif + diff --git a/source/gles2n64/src/3DMathNeon.cpp b/source/gles2n64/src/3DMathNeon.cpp new file mode 100644 index 0000000..41524b6 --- /dev/null +++ b/source/gles2n64/src/3DMathNeon.cpp @@ -0,0 +1,133 @@ +#include "3DMath.h" + +static void MultMatrix_neon( float m0[4][4], float m1[4][4], float dest[4][4]) +{ + asm volatile ( + "vld1.32 {d0, d1}, [%1]! \n\t" //q0 = m1 + "vld1.32 {d2, d3}, [%1]! \n\t" //q1 = m1+4 + "vld1.32 {d4, d5}, [%1]! \n\t" //q2 = m1+8 + "vld1.32 {d6, d7}, [%1] \n\t" //q3 = m1+12 + "vld1.32 {d16, d17}, [%0]! \n\t" //q8 = m0 + "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m0+4 + "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m0+8 + "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m0+12 + + "vmul.f32 q12, q8, d0[0] \n\t" //q12 = q8 * d0[0] + "vmul.f32 q13, q8, d2[0] \n\t" //q13 = q8 * d2[0] + "vmul.f32 q14, q8, d4[0] \n\t" //q14 = q8 * d4[0] + "vmul.f32 q15, q8, d6[0] \n\t" //q15 = q8 * d6[0] + "vmla.f32 q12, q9, d0[1] \n\t" //q12 = q9 * d0[1] + "vmla.f32 q13, q9, d2[1] \n\t" //q13 = q9 * d2[1] + "vmla.f32 q14, q9, d4[1] \n\t" //q14 = q9 * d4[1] + "vmla.f32 q15, q9, d6[1] \n\t" //q15 = q9 * d6[1] + "vmla.f32 q12, q10, d1[0] \n\t" //q12 = q10 * d0[0] + "vmla.f32 q13, q10, d3[0] \n\t" //q13 = q10 * d2[0] + "vmla.f32 q14, q10, d5[0] \n\t" //q14 = q10 * d4[0] + "vmla.f32 q15, q10, d7[0] \n\t" //q15 = q10 * d6[0] + "vmla.f32 q12, q11, d1[1] \n\t" //q12 = q11 * d0[1] + "vmla.f32 q13, q11, d3[1] \n\t" //q13 = q11 * d2[1] + "vmla.f32 q14, q11, d5[1] \n\t" //q14 = q11 * d4[1] + "vmla.f32 q15, q11, d7[1] \n\t" //q15 = q11 * d6[1] + + "vst1.32 {d24, d25}, [%2]! \n\t" //d = q12 + "vst1.32 {d26, d27}, [%2]! \n\t" //d+4 = q13 + "vst1.32 {d28, d29}, [%2]! \n\t" //d+8 = q14 + "vst1.32 {d30, d31}, [%2] \n\t" //d+12 = q15 + + :"+r"(m0), "+r"(m1), "+r"(dest): + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "memory" + ); +} + +static void TransformVectorNormalize_neon(float vec[3], float mtx[4][4]) +{ + asm volatile ( + "vld1.32 {d0}, [%1] \n\t" //Q0 = v + "flds s2, [%1, #8] \n\t" //Q0 = v + "vld1.32 {d18, d19}, [%0]! \n\t" //Q1 = m + "vld1.32 {d20, d21}, [%0]! \n\t" //Q2 = m+4 + "vld1.32 {d22, d23}, [%0] \n\t" //Q3 = m+8 + + "vmul.f32 q2, q9, d0[0] \n\t" //q2 = q9*Q0[0] + "vmla.f32 q2, q10, d0[1] \n\t" //Q5 += Q1*Q0[1] + "vmla.f32 q2, q11, d1[0] \n\t" //Q5 += Q2*Q0[2] + + "vmul.f32 d0, d4, d4 \n\t" //d0 = d0*d0 + "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1] + "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d1*d1 + + "vmov.f32 d1, d0 \n\t" //d1 = d0 + "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0) + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4 + + "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4 + + "vst1.32 {d4}, [%1] \n\t" //Q4 = m+12 + "fsts s10, [%1, #8] \n\t" //Q4 = m+12 + : "+r"(mtx): "r"(vec) + : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory" + ); +} + +static void Normalize_neon(float v[3]) +{ + asm volatile ( + "vld1.32 {d4}, [%0]! \n\t" //d4={x,y} + "flds s10, [%0] \n\t" //d5[0] = z + "sub %0, %0, #8 \n\t" //d5[0] = z + "vmul.f32 d0, d4, d4 \n\t" //d0= d4*d4 + "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1] + "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5 + + "vmov.f32 d1, d0 \n\t" //d1 = d0 + "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0) + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4 + + "vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4 + "vst1.32 {d4}, [%0]! \n\t" //d2={x0,y0}, d3={z0, w0} + "fsts s10, [%0] \n\t" //d2={x0,y0}, d3={z0, w0} + + :"+r"(v) : + : "d0", "d1", "d2", "d3", "d4", "d5", "memory" + ); +} + +static float DotProduct_neon( float v0[3], float v1[3] ) +{ + float dot; + asm volatile ( + "vld1.32 {d8}, [%1]! \n\t" //d8={x0,y0} + "vld1.32 {d10}, [%2]! \n\t" //d10={x1,y1} + "flds s18, [%1, #0] \n\t" //d9[0]={z0} + "flds s22, [%2, #0] \n\t" //d11[0]={z1} + "vmul.f32 d12, d8, d10 \n\t" //d0= d2*d4 + "vpadd.f32 d12, d12, d12 \n\t" //d0 = d[0] + d[1] + "vmla.f32 d12, d9, d11 \n\t" //d0 = d0 + d3*d5 + "fmrs %0, s24 \n\t" //r0 = s0 + : "=r"(dot), "+r"(v0), "+r"(v1): + : "d8", "d9", "d10", "d11", "d12" + + ); + return dot; +} + +void MathInitNeon() +{ + MultMatrix = MultMatrix_neon; + TransformVectorNormalize = TransformVectorNormalize_neon; + Normalize = Normalize_neon; + DotProduct = DotProduct_neon; +} diff --git a/source/gles2n64/src/COPYING b/source/gles2n64/src/COPYING new file mode 100644 index 0000000..518eaa2 --- /dev/null +++ b/source/gles2n64/src/COPYING @@ -0,0 +1,172 @@ +This directory contains the source code of gles2n64 ported to Android +by yongzh (freeman.yong@gmail.com). The original source code is available at: + +http://code.google.com/p/gles2n64/ + + + + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/source/gles2n64/src/CRC.cpp b/source/gles2n64/src/CRC.cpp new file mode 100644 index 0000000..9ab38a4 --- /dev/null +++ b/source/gles2n64/src/CRC.cpp @@ -0,0 +1,93 @@ +#include "Types.h" + +#define CRC32_POLYNOMIAL 0x04C11DB7 + +#ifdef __CRC_OPT +unsigned int CRCTable[ 256 * 4]; +#else +unsigned int CRCTable[ 256 ]; +#endif + +u32 Reflect( u32 ref, char ch ) +{ + u32 value = 0; + + // Swap bit 0 for bit 7 + // bit 1 for bit 6, etc. + for (int i = 1; i < (ch + 1); i++) + { + if(ref & 1) + value |= 1 << (ch - i); + ref >>= 1; + } + return value; +} + +void CRC_BuildTable() +{ + u32 crc; + + for (int i = 0; i < 256; i++) + { + crc = Reflect( i, 8 ) << 24; + for (int j = 0; j < 8; j++) + crc = (crc << 1) ^ (crc & (1 << 31) ? CRC32_POLYNOMIAL : 0); + + CRCTable[i] = Reflect( crc, 32 ); + } + +#ifdef __CRC_OPT + for (int i = 0; i < 256; i++) + { + for(int j = 0; j < 3; j++) + { + CRCTable[256*(j+1) + i] = (CRCTable[256*j + i]>>8) ^ CRCTable[CRCTable[256*j + i]&0xFF]; + } + } +#endif + +} + +u32 CRC_Calculate( u32 crc, void *buffer, u32 count ) +{ + u8 *p; + u32 orig = crc; + + p = (u8*) buffer; + +#ifdef __CRC_OPT + while(count > 3) + { + crc ^= *(unsigned int*) p; p += 4; + crc = CRCTable[3*256 + (crc&0xFF)] + ^ CRCTable[2*256 + ((crc>>8)&0xFF)] + ^ CRCTable[1*256 + ((crc>>16)&0xFF)] + ^ CRCTable[0*256 + ((crc>>24))]; + + count -= 4; + } +#endif + + while (count--) + crc = (crc >> 8) ^ CRCTable[(crc & 0xFF) ^ *p++]; + + return crc ^ orig; +} + +u32 CRC_CalculatePalette( u32 crc, void *buffer, u32 count ) +{ + u8 *p; + u32 orig = crc; + + p = (u8*) buffer; + while (count--) + { + crc = (crc >> 8) ^ CRCTable[(crc & 0xFF) ^ *p++]; + crc = (crc >> 8) ^ CRCTable[(crc & 0xFF) ^ *p++]; + + p += 6; + } + + return crc ^ orig; +} + diff --git a/source/gles2n64/src/CRC.h b/source/gles2n64/src/CRC.h new file mode 100644 index 0000000..cac7750 --- /dev/null +++ b/source/gles2n64/src/CRC.h @@ -0,0 +1,7 @@ +#include "Types.h" + +void CRC_BuildTable(); + +u32 CRC_Calculate( u32 crc, void *buffer, u32 count ); +u32 CRC_CalculatePalette( u32 crc, void *buffer, u32 count ); + diff --git a/source/gles2n64/src/Common.h b/source/gles2n64/src/Common.h new file mode 100755 index 0000000..ac508cd --- /dev/null +++ b/source/gles2n64/src/Common.h @@ -0,0 +1,38 @@ +#ifndef __COMMON_H__ +#define __COMMON_H__ + +//#define PROFILE_GBI + +#define LOG_NONE 0 +#define LOG_ERROR 1 +#define LOG_MINIMAL 2 +#define LOG_WARNING 3 +#define LOG_VERBOSE 4 + +#define LOG_LEVEL LOG_NONE + +# ifndef min +# define min(a,b) ((a) < (b) ? (a) : (b)) +# endif +# ifndef max +# define max(a,b) ((a) > (b) ? (a) : (b)) +# endif + + +#if LOG_LEVEL>0 + +#include + +#define LOG(A, ...) \ + if (A <= LOG_LEVEL) \ + { \ + __android_log_print(ANDROID_LOG_DEBUG, "gles2n64", __VA_ARGS__); \ + } + +#else + +#define LOG(A, ...) + +#endif + +#endif diff --git a/source/gles2n64/src/Config.cpp b/source/gles2n64/src/Config.cpp new file mode 100644 index 0000000..c34ff6c --- /dev/null +++ b/source/gles2n64/src/Config.cpp @@ -0,0 +1,307 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Mupen64plus - Config_nogui.cpp * + * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ * + * Copyright (C) 2008 Tillin9 * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include +#include +#include +#include + +#include "Config.h" +#include "gles2N64.h" +#include "RSP.h" +#include "Textures.h" +#include "OpenGL.h" + +#include "Config.h" +#include "Common.h" + + +Config config; + +struct Option +{ + const char* name; + int* data; + const int initial; +}; + + +#define CONFIG_VERSION 2 + +Option configOptions[] = +{ + {"#gles2n64 Graphics Plugin for N64", NULL, 0}, + {"#by Orkin / glN64 developers and Adventus.", NULL, 0}, + + {"config version", &config.version, 0}, + {"", NULL, 0}, + + {"#Window Settings:", NULL, 0}, + {"window xpos", &config.window.xpos, 0}, + {"window ypos", &config.window.ypos, 0}, + {"window width", &config.window.width, 800}, + {"window height", &config.window.height, 480}, + {"window refwidth", &config.window.refwidth, 800}, + {"window refheight", &config.window.refheight, 480}, + {"", NULL, 0}, + + {"#Framebuffer Settings:",NULL,0}, +// {"framebuffer enable", &config.framebuffer.enable, 0}, + {"framebuffer bilinear", &config.framebuffer.bilinear, 0}, + {"framebuffer width", &config.framebuffer.width, 400}, + {"framebuffer height", &config.framebuffer.height, 240}, +// {"framebuffer width", &config.framebuffer.width, 800}, +// {"framebuffer height", &config.framebuffer.height, 480}, + {"", NULL, 0}, + + {"#VI Settings:", NULL, 0}, + {"video force", &config.video.force, 0}, + {"video width", &config.video.width, 320}, + {"video height", &config.video.height, 240}, + {"", NULL, 0}, + + {"#Render Settings:", NULL, 0}, + {"enable fog", &config.enableFog, 0}, + {"enable primitive z", &config.enablePrimZ, 1}, + {"enable lighting", &config.enableLighting, 1}, + {"enable alpha test", &config.enableAlphaTest, 1}, + {"enable clipping", &config.enableClipping, 0}, + {"enable face culling", &config.enableFaceCulling, 1}, + {"enable noise", &config.enableNoise, 0}, + {"", NULL, 0}, + + {"#Texture Settings:", NULL, 0}, + {"texture 2xSAI", &config.texture.sai2x, 0}, + {"texture force bilinear", &config.texture.forceBilinear, 0}, + {"texture max anisotropy", &config.texture.maxAnisotropy, 0}, + {"texture use IA", &config.texture.useIA, 0}, + {"texture fast CRC", &config.texture.fastCRC, 1}, + {"texture pow2", &config.texture.pow2, 1}, + {"", NULL, 0}, + + {"#Frame skip:", NULL, 0}, + {"auto frameskip", &config.autoFrameSkip, 0}, + {"max frameskip", &config.maxFrameSkip, 0}, + {"target FPS", &config.targetFPS, 20}, + {"frame render rate", &config.frameRenderRate, 1}, + {"vertical sync", &config.verticalSync, 0}, + {"", NULL, 0}, + + {"#Other Settings:", NULL, 0}, + {"update mode", &config.updateMode, SCREEN_UPDATE_AT_VI_UPDATE }, + {"ignore offscreen rendering", &config.ignoreOffscreenRendering, 0}, + {"force screen clear", &config.forceBufferClear, 0}, + {"flip vertical", &config.screen.flipVertical, 0}, +// paulscode: removed from pre-compile to a config option +//// (part of the Galaxy S Zelda crash-fix + {"tribuffer opt", &config.tribufferOpt, 1}, +// + {"", NULL, 0}, + + {"#Hack Settings:", NULL, 0}, + {"hack banjo tooie", &config.hackBanjoTooie, 0}, + {"hack zelda", &config.hackZelda, 0}, + {"hack alpha", &config.hackAlpha, 0}, + {"hack z", &config.zHack, 0}, + +}; + +const int configOptionsSize = sizeof(configOptions) / sizeof(Option); + +void Config_WriteConfig(const char *filename) +{ + config.version = CONFIG_VERSION; + FILE* f = fopen(filename, "w"); + if (!f) + { + LOG(LOG_ERROR, "Could Not Open %s for writing\n", filename); + } + + for(int i=0; iname); + if (o->data) fprintf(f,"=%i", *(o->data)); + fprintf(f, "\n"); + } + + + fclose(f); +} + +void Config_SetDefault() +{ + for(int i=0; i < configOptionsSize; i++) + { + Option *o = &configOptions[i]; + if (o->data) *(o->data) = o->initial; + } +} + +void Config_SetOption(char* line, char* val) +{ + for(int i=0; i< configOptionsSize; i++) + { + Option *o = &configOptions[i]; + if (strcasecmp(line, o->name) == 0) + { + if (o->data) + { + int v = atoi(val); + *(o->data) = v; + LOG(LOG_VERBOSE, "Config Option: %s = %i\n", o->name, v); + } + break; + } + } +} + +void Config_LoadRomConfig(unsigned char* header) +{ + char line[4096]; + + // get the name of the ROM + for (int i=0; i<20; i++) config.romName[i] = header[0x20+i]; + config.romName[20] = '\0'; + while (config.romName[strlen(config.romName)-1] == ' ') + { + config.romName[strlen(config.romName)-1] = '\0'; + } + + switch(header[0x3e]) + { + // PAL codes + case 0x44: + case 0x46: + case 0x49: + case 0x50: + case 0x53: + case 0x55: + case 0x58: + case 0x59: + config.romPAL = true; + break; + + // NTSC codes + case 0x37: + case 0x41: + case 0x45: + case 0x4a: + config.romPAL = false; + break; + + // Fallback for unknown codes + default: + config.romPAL = false; + } + + LOG(LOG_MINIMAL, "Rom is %s\n", config.romPAL ? "PAL" : "NTSC"); + + const char *filename = ConfigGetSharedDataFilepath("gles2n64rom.conf"); + FILE *f = fopen(filename,"r"); + if (!f) + { + LOG(LOG_MINIMAL, "Could not find %s Rom settings file, using global.\n", filename); + return; + } + else + { + LOG(LOG_MINIMAL, "[gles2N64]: Searching %s Database for \"%s\" ROM\n", filename, config.romName); + bool isRom = false; + while (!feof(f)) + { + fgets(line, 4096, f); + if (line[0] == '\n') continue; + + if (strncmp(line,"rom name=", 9) == 0) + { + //Depending on the editor, end lines could be terminated by "LF" or "CRLF" + char* lf = strchr(line, '\n'); //Line Feed + char* cr = strchr(line, '\r'); //Carriage Return + if (lf) *lf='\0'; + if (cr) *cr='\0'; + isRom = (strcasecmp(config.romName, line+9) == 0); + } + else + { + if (isRom) + { + char* val = strchr(line, '='); + if (!val) continue; + *val++ = '\0'; + Config_SetOption(line,val); + LOG(LOG_MINIMAL, "%s = %s", line, val); + } + } + } + } + + fclose(f); +} + +void Config_LoadConfig() +{ + FILE *f; + char line[4096]; + + // default configuration + Config_SetDefault(); + + // read configuration + const char *filename = ConfigGetSharedDataFilepath("gles2n64.conf"); + f = fopen(filename, "r"); + if (!f) + { + LOG(LOG_MINIMAL, "[gles2N64]: Couldn't open config file '%s' for reading: %s\n", filename, strerror( errno ) ); + LOG(LOG_MINIMAL, "[gles2N64]: Attempting to write new Config \n"); + Config_WriteConfig(filename); + } + else + { + LOG(LOG_MINIMAL, "[gles2n64]: Loading Config from %s \n", filename); + + while (!feof( f )) + { + char *val; + fgets( line, 4096, f ); + + if (line[0] == '#' || line[0] == '\n') + continue; + + val = strchr( line, '=' ); + if (!val) continue; + + *val++ = '\0'; + + Config_SetOption(line,val); + } + + if (config.version < CONFIG_VERSION) + { + LOG(LOG_WARNING, "[gles2N64]: Wrong config version, rewriting config with defaults\n"); + Config_SetDefault(); + Config_WriteConfig(filename); + } + + fclose(f); + } +} + diff --git a/source/gles2n64/src/Config.h b/source/gles2n64/src/Config.h new file mode 100644 index 0000000..57fcf3f --- /dev/null +++ b/source/gles2n64/src/Config.h @@ -0,0 +1,79 @@ +#ifndef CONFIG_H +#define CONFIG_H + +struct Config +{ + int version; + + struct + { + int flipVertical; + } screen; + + struct + { + int xpos, ypos, width, height, refwidth, refheight; + } window; + + struct + { + int enable, bilinear; + int xpos, ypos, width, height; + } framebuffer; + + struct + { + int force, width, height; + } video; + + struct + { + int maxAnisotropy; + int enableMipmap; + int forceBilinear; + int sai2x; + int useIA; + int fastCRC; + int pow2; + } texture; + + int logFrameRate; + int updateMode; + int forceBufferClear; + int ignoreOffscreenRendering; + int zHack; + + int autoFrameSkip; + int maxFrameSkip; + int targetFPS; + int frameRenderRate; + int verticalSync; + + int enableFog; + int enablePrimZ; + int enableLighting; + int enableAlphaTest; + int enableClipping; + int enableFaceCulling; + int enableNoise; + +// paulscode: removed from pre-compile to a config option +//// (part of the Galaxy S Zelda crash-fix + int tribufferOpt; +// + + int hackBanjoTooie; + int hackZelda; + int hackAlpha; + + bool stretchVideo; + bool romPAL; //is the rom PAL + char romName[21]; +}; + +extern Config config; + +void Config_LoadConfig(); +void Config_LoadRomConfig(unsigned char* header); +#endif + diff --git a/source/gles2n64/src/Debug.h b/source/gles2n64/src/Debug.h new file mode 100644 index 0000000..b1fcec2 --- /dev/null +++ b/source/gles2n64/src/Debug.h @@ -0,0 +1,30 @@ +#if !defined( DEBUG_H ) && defined( DEBUG ) +#define DEBUG_H + +#include + +#define DEBUG_LOW 0x1000 +#define DEBUG_MEDIUM 0x2000 +#define DEBUG_HIGH 0x4000 +#define DEBUG_DETAIL 0x8000 + +#define DEBUG_HANDLED 0x0001 +#define DEBUG_UNHANDLED 0x0002 +#define DEBUG_IGNORED 0x0004 +#define DEBUG_UNKNOWN 0x0008 +#define DEBUG_ERROR 0x0010 +#define DEBUG_COMBINE 0x0020 +#define DEBUG_TEXTURE 0x0040 +#define DEBUG_VERTEX 0x0080 +#define DEBUG_TRIANGLE 0x0100 +#define DEBUG_MATRIX 0x0200 + +#define OpenDebugDlg() +#define CloseDebugDlg() +#define StartDump(filename) +#define EndDump() +#define DebugMsg(type, format, ... ) printf(format, ## __VA_ARGS__) +#define DebugRSPState(pci, pc, cmd, w0, w1) + +#endif // DEBUG_H + diff --git a/source/gles2n64/src/DepthBuffer.cpp b/source/gles2n64/src/DepthBuffer.cpp new file mode 100644 index 0000000..8f2ebc1 --- /dev/null +++ b/source/gles2n64/src/DepthBuffer.cpp @@ -0,0 +1,165 @@ +#include +#include "DepthBuffer.h" +#include "Types.h" + +DepthBufferInfo depthBuffer; + +void DepthBuffer_Init() +{ + depthBuffer.current = NULL; + depthBuffer.top = NULL; + depthBuffer.bottom = NULL; + depthBuffer.numBuffers = 0; +} + +void DepthBuffer_RemoveBottom() +{ + DepthBuffer *newBottom = depthBuffer.bottom->higher; + + if (depthBuffer.bottom == depthBuffer.top) + depthBuffer.top = NULL; + + free( depthBuffer.bottom ); + + depthBuffer.bottom = newBottom; + + if (depthBuffer.bottom != NULL) + depthBuffer.bottom->lower = NULL; + + depthBuffer.numBuffers--; +} + +void DepthBuffer_Remove( DepthBuffer *buffer ) +{ + if ((buffer == depthBuffer.bottom) && + (buffer == depthBuffer.top)) + { + depthBuffer.top = NULL; + depthBuffer.bottom = NULL; + } + else if (buffer == depthBuffer.bottom) + { + depthBuffer.bottom = buffer->higher; + + if (depthBuffer.bottom) + depthBuffer.bottom->lower = NULL; + } + else if (buffer == depthBuffer.top) + { + depthBuffer.top = buffer->lower; + + if (depthBuffer.top) + depthBuffer.top->higher = NULL; + } + else + { + buffer->higher->lower = buffer->lower; + buffer->lower->higher = buffer->higher; + } + + free( buffer ); + depthBuffer.numBuffers--; +} + +void DepthBuffer_RemoveBuffer( u32 address ) +{ + DepthBuffer *current = depthBuffer.bottom; + while (current != NULL) + { + if (current->address == address) + { + DepthBuffer_Remove( current ); + return; + } + current = current->higher; + } +} + +DepthBuffer *DepthBuffer_AddTop() +{ + DepthBuffer *newtop = (DepthBuffer*)malloc( sizeof( DepthBuffer ) ); + + newtop->lower = depthBuffer.top; + newtop->higher = NULL; + + if (depthBuffer.top) + depthBuffer.top->higher = newtop; + + if (!depthBuffer.bottom) + depthBuffer.bottom = newtop; + + depthBuffer.top = newtop; + + depthBuffer.numBuffers++; + + return newtop; +} + +void DepthBuffer_MoveToTop( DepthBuffer *newtop ) +{ + if (newtop == depthBuffer.top) + return; + + if (newtop == depthBuffer.bottom) + { + depthBuffer.bottom = newtop->higher; + depthBuffer.bottom->lower = NULL; + } + else + { + newtop->higher->lower = newtop->lower; + newtop->lower->higher = newtop->higher; + } + + newtop->higher = NULL; + newtop->lower = depthBuffer.top; + depthBuffer.top->higher = newtop; + depthBuffer.top = newtop; +} + +void DepthBuffer_Destroy() +{ + while (depthBuffer.bottom) + DepthBuffer_RemoveBottom(); + + depthBuffer.top = NULL; +} + +void DepthBuffer_SetBuffer( u32 address ) +{ + DepthBuffer *current = depthBuffer.top; + + // Search through saved depth buffers + while (current != NULL) + { + if (current->address == address) + { + DepthBuffer_MoveToTop( current ); + depthBuffer.current = current; + return; + } + current = current->lower; + } + + current = DepthBuffer_AddTop(); + + current->address = address; + current->cleared = TRUE; + + depthBuffer.current = current; +} + +DepthBuffer *DepthBuffer_FindBuffer( u32 address ) +{ + DepthBuffer *current = depthBuffer.top; + + while (current) + { + if (current->address == address) + return current; + current = current->lower; + } + + return NULL; +} + diff --git a/source/gles2n64/src/DepthBuffer.h b/source/gles2n64/src/DepthBuffer.h new file mode 100644 index 0000000..dd7d5e6 --- /dev/null +++ b/source/gles2n64/src/DepthBuffer.h @@ -0,0 +1,28 @@ +#ifndef DEPTHBUFFER_H +#define DEPTHBUFFER_H + +#include "Types.h" + +struct DepthBuffer +{ + DepthBuffer *higher, *lower; + + u32 address, cleared; +}; + +struct DepthBufferInfo +{ + DepthBuffer *top, *bottom, *current; + int numBuffers; +}; + +extern DepthBufferInfo depthBuffer; + +void DepthBuffer_Init(); +void DepthBuffer_Destroy(); +void DepthBuffer_SetBuffer( u32 address ); +void DepthBuffer_RemoveBuffer( u32 address ); +DepthBuffer *DepthBuffer_FindBuffer( u32 address ); + +#endif + diff --git a/source/gles2n64/src/F3D.cpp b/source/gles2n64/src/F3D.cpp new file mode 100644 index 0000000..a5524d0 --- /dev/null +++ b/source/gles2n64/src/F3D.cpp @@ -0,0 +1,374 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" +#include "OpenGL.h" +#include "DepthBuffer.h" + +#include "Config.h" + +void F3D_SPNoOp( u32 w0, u32 w1 ) +{ + gSPNoOp(); +} + +void F3D_Mtx( u32 w0, u32 w1 ) +{ + if (_SHIFTR( w0, 0, 16 ) != 64) + { +// GBI_DetectUCode(); // Something's wrong +#ifdef DEBUG + DebugMsg( DEBUG_MEDIUM | DEBUG_HIGH | DEBUG_ERROR, "G_MTX: address = 0x%08X length = %i params = 0x%02X\n", w1, _SHIFTR( w0, 0, 16 ), _SHIFTR( w0, 16, 8 ) ); +#endif + return; + } + + gSPMatrix( w1, _SHIFTR( w0, 16, 8 ) ); +} + +void F3D_Reserved0( u32 w0, u32 w1 ) +{ +#ifdef DEBUG + DebugMsg( DEBUG_MEDIUM | DEBUG_IGNORED | DEBUG_UNKNOWN, "G_RESERVED0: w0=0x%08lX w1=0x%08lX\n", w0, w1 ); +#endif +} + +void F3D_MoveMem( u32 w0, u32 w1 ) +{ +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif + switch (_SHIFTR( w0, 16, 8 )) + { + case F3D_MV_VIEWPORT://G_MV_VIEWPORT: + gSPViewport( w1 ); + break; + case G_MV_MATRIX_1: + gSPForceMatrix( w1 ); + // force matrix takes four commands + RSP.PC[RSP.PCi] += 24; + break; + case G_MV_L0: + gSPLight( w1, LIGHT_1 ); + break; + case G_MV_L1: + gSPLight( w1, LIGHT_2 ); + break; + case G_MV_L2: + gSPLight( w1, LIGHT_3 ); + break; + case G_MV_L3: + gSPLight( w1, LIGHT_4 ); + break; + case G_MV_L4: + gSPLight( w1, LIGHT_5 ); + break; + case G_MV_L5: + gSPLight( w1, LIGHT_6 ); + break; + case G_MV_L6: + gSPLight( w1, LIGHT_7 ); + break; + case G_MV_L7: + gSPLight( w1, LIGHT_8 ); + break; + case G_MV_LOOKATX: + break; + case G_MV_LOOKATY: + break; + } +} + +void F3D_Vtx( u32 w0, u32 w1 ) +{ + gSPVertex( w1, _SHIFTR( w0, 20, 4 ) + 1, _SHIFTR( w0, 16, 4 ) ); +} + +void F3D_Reserved1( u32 w0, u32 w1 ) +{ +} + +void F3D_DList( u32 w0, u32 w1 ) +{ + switch (_SHIFTR( w0, 16, 8 )) + { + case G_DL_PUSH: + gSPDisplayList( w1 ); + break; + case G_DL_NOPUSH: + gSPBranchList( w1 ); + break; + } + +#ifdef __TRIBUFFER_OPT + //since PCi can be changed in gSPDisplayList + gSPFlushTriangles(); +#endif +} + +void F3D_Reserved2( u32 w0, u32 w1 ) +{ +} + +void F3D_Reserved3( u32 w0, u32 w1 ) +{ +} + +void F3D_Sprite2D_Base( u32 w0, u32 w1 ) +{ + //gSPSprite2DBase( w1 ); + RSP.PC[RSP.PCi] += 8; +} + + +void F3D_Tri1( u32 w0, u32 w1 ) +{ + gSP1Triangle( _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, _SHIFTR( w1, 0, 8 ) / 10); +} + +void F3D_CullDL( u32 w0, u32 w1 ) +{ + gSPCullDisplayList( _SHIFTR( w0, 0, 24 ) / 40, (w1 / 40) - 1 ); +} + +void F3D_PopMtx( u32 w0, u32 w1 ) +{ + gSPPopMatrix( w1 ); +} + +void F3D_MoveWord( u32 w0, u32 w1 ) +{ + switch (_SHIFTR( w0, 0, 8 )) + { + case G_MW_MATRIX: + gSPInsertMatrix( _SHIFTR( w0, 8, 16 ), w1 ); + break; + + case G_MW_NUMLIGHT: + gSPNumLights( ((w1 - 0x80000000) >> 5) - 1 ); + break; + + case G_MW_CLIP: + gSPClipRatio( w1 ); + break; + + case G_MW_SEGMENT: + gSPSegment( _SHIFTR( w0, 8, 16 ) >> 2, w1 & 0x00FFFFFF ); + break; + + case G_MW_FOG: + gSPFogFactor( (s16)_SHIFTR( w1, 16, 16 ), (s16)_SHIFTR( w1, 0, 16 ) ); + break; + + case G_MW_LIGHTCOL: + switch (_SHIFTR( w0, 8, 16 )) + { + case F3D_MWO_aLIGHT_1: + gSPLightColor( LIGHT_1, w1 ); + break; + case F3D_MWO_aLIGHT_2: + gSPLightColor( LIGHT_2, w1 ); + break; + case F3D_MWO_aLIGHT_3: + gSPLightColor( LIGHT_3, w1 ); + break; + case F3D_MWO_aLIGHT_4: + gSPLightColor( LIGHT_4, w1 ); + break; + case F3D_MWO_aLIGHT_5: + gSPLightColor( LIGHT_5, w1 ); + break; + case F3D_MWO_aLIGHT_6: + gSPLightColor( LIGHT_6, w1 ); + break; + case F3D_MWO_aLIGHT_7: + gSPLightColor( LIGHT_7, w1 ); + break; + case F3D_MWO_aLIGHT_8: + gSPLightColor( LIGHT_8, w1 ); + break; + } + break; + case G_MW_POINTS: + gSPModifyVertex( _SHIFTR( w0, 8, 16 ) / 40, _SHIFTR( w0, 0, 8 ) % 40, w1 ); + break; + case G_MW_PERSPNORM: + gSPPerspNormalize( w1 ); + break; + } +} + +void F3D_Texture( u32 w0, u32 w1 ) +{ + gSPTexture( _FIXED2FLOAT( _SHIFTR( w1, 16, 16 ), 16 ), + _FIXED2FLOAT( _SHIFTR( w1, 0, 16 ), 16 ), + _SHIFTR( w0, 11, 3 ), + _SHIFTR( w0, 8, 3 ), + _SHIFTR( w0, 0, 8 ) ); +} + +void F3D_SetOtherMode_H( u32 w0, u32 w1 ) +{ + switch (_SHIFTR( w0, 8, 8 )) + { + case G_MDSFT_PIPELINE: + gDPPipelineMode( w1 >> G_MDSFT_PIPELINE ); + break; + case G_MDSFT_CYCLETYPE: + gDPSetCycleType( w1 >> G_MDSFT_CYCLETYPE ); + break; + case G_MDSFT_TEXTPERSP: + gDPSetTexturePersp( w1 >> G_MDSFT_TEXTPERSP ); + break; + case G_MDSFT_TEXTDETAIL: + gDPSetTextureDetail( w1 >> G_MDSFT_TEXTDETAIL ); + break; + case G_MDSFT_TEXTLOD: + gDPSetTextureLOD( w1 >> G_MDSFT_TEXTLOD ); + break; + case G_MDSFT_TEXTLUT: + gDPSetTextureLUT( w1 >> G_MDSFT_TEXTLUT ); + break; + case G_MDSFT_TEXTFILT: + gDPSetTextureFilter( w1 >> G_MDSFT_TEXTFILT ); + break; + case G_MDSFT_TEXTCONV: + gDPSetTextureConvert( w1 >> G_MDSFT_TEXTCONV ); + break; + case G_MDSFT_COMBKEY: + gDPSetCombineKey( w1 >> G_MDSFT_COMBKEY ); + break; + case G_MDSFT_RGBDITHER: + gDPSetColorDither( w1 >> G_MDSFT_RGBDITHER ); + break; + case G_MDSFT_ALPHADITHER: + gDPSetAlphaDither( w1 >> G_MDSFT_ALPHADITHER ); + break; + default: + u32 shift = _SHIFTR( w0, 8, 8 ); + u32 length = _SHIFTR( w0, 0, 8 ); + u32 mask = ((1 << length) - 1) << shift; + + gDP.otherMode.h &= ~mask; + gDP.otherMode.h |= w1 & mask; + + gDP.changed |= CHANGED_CYCLETYPE; + break; + } +} + +void F3D_SetOtherMode_L( u32 w0, u32 w1 ) +{ + switch (_SHIFTR( w0, 8, 8 )) + { + case G_MDSFT_ALPHACOMPARE: + gDPSetAlphaCompare( w1 >> G_MDSFT_ALPHACOMPARE ); + break; + case G_MDSFT_ZSRCSEL: + gDPSetDepthSource( w1 >> G_MDSFT_ZSRCSEL ); + break; + case G_MDSFT_RENDERMODE: + gDPSetRenderMode( w1 & 0xCCCCFFFF, w1 & 0x3333FFFF ); + break; + default: + u32 shift = _SHIFTR( w0, 8, 8 ); + u32 length = _SHIFTR( w0, 0, 8 ); + u32 mask = ((1 << length) - 1) << shift; + + gDP.otherMode.l &= ~mask; + gDP.otherMode.l |= w1 & mask; + + gDP.changed |= CHANGED_RENDERMODE | CHANGED_ALPHACOMPARE; + break; + } +} + +void F3D_EndDL( u32 w0, u32 w1 ) +{ + gSPEndDisplayList(); +} + +void F3D_SetGeometryMode( u32 w0, u32 w1 ) +{ + gSPSetGeometryMode( w1 ); +} + +void F3D_ClearGeometryMode( u32 w0, u32 w1 ) +{ + gSPClearGeometryMode( w1 ); +} + +void F3D_Line3D( u32 w0, u32 w1 ) +{ + // Hmmm... +} + +void F3D_Quad( u32 w0, u32 w1 ) +{ + gSP1Quadrangle( _SHIFTR( w1, 24, 8 ) / 10, _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, _SHIFTR( w1, 0, 8 ) / 10 ); +} + +void F3D_RDPHalf_1( u32 w0, u32 w1 ) +{ + gDP.half_1 = w1; +} + +void F3D_RDPHalf_2( u32 w0, u32 w1 ) +{ + gDP.half_2 = w1; +} + +void F3D_RDPHalf_Cont( u32 w0, u32 w1 ) +{ +} + +void F3D_Tri4( u32 w0, u32 w1 ) +{ + gSP4Triangles( _SHIFTR( w0, 0, 4 ), _SHIFTR( w1, 0, 4 ), _SHIFTR( w1, 4, 4 ), + _SHIFTR( w0, 4, 4 ), _SHIFTR( w1, 8, 4 ), _SHIFTR( w1, 12, 4 ), + _SHIFTR( w0, 8, 4 ), _SHIFTR( w1, 16, 4 ), _SHIFTR( w1, 20, 4 ), + _SHIFTR( w0, 12, 4 ), _SHIFTR( w1, 24, 4 ), _SHIFTR( w1, 28, 4 ) ); +} + +void F3D_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3D ); + + GBI.PCStackSize = 10; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_MTX, F3D_MTX, F3D_Mtx ); + GBI_SetGBI( G_RESERVED0, F3D_RESERVED0, F3D_Reserved0 ); + GBI_SetGBI( G_MOVEMEM, F3D_MOVEMEM, F3D_MoveMem ); + GBI_SetGBI( G_VTX, F3D_VTX, F3D_Vtx ); + GBI_SetGBI( G_RESERVED1, F3D_RESERVED1, F3D_Reserved1 ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_RESERVED2, F3D_RESERVED2, F3D_Reserved2 ); + GBI_SetGBI( G_RESERVED3, F3D_RESERVED3, F3D_Reserved3 ); + GBI_SetGBI( G_SPRITE2D_BASE, F3D_SPRITE2D_BASE, F3D_Sprite2D_Base ); + + GBI_SetGBI( G_TRI1, F3D_TRI1, F3D_Tri1 ); + GBI_SetGBI( G_CULLDL, F3D_CULLDL, F3D_CullDL ); + GBI_SetGBI( G_POPMTX, F3D_POPMTX, F3D_PopMtx ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3D_MoveWord ); + GBI_SetGBI( G_TEXTURE, F3D_TEXTURE, F3D_Texture ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_SETGEOMETRYMODE, F3D_SETGEOMETRYMODE, F3D_SetGeometryMode ); + GBI_SetGBI( G_CLEARGEOMETRYMODE, F3D_CLEARGEOMETRYMODE, F3D_ClearGeometryMode ); + GBI_SetGBI( G_QUAD, F3D_QUAD, F3D_Quad ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_RDPHALF_CONT, F3D_RDPHALF_CONT, F3D_RDPHalf_Cont ); + GBI_SetGBI( G_TRI4, F3D_TRI4, F3D_Tri4 ); + +} + diff --git a/source/gles2n64/src/F3D.h b/source/gles2n64/src/F3D.h new file mode 100644 index 0000000..6efb49c --- /dev/null +++ b/source/gles2n64/src/F3D.h @@ -0,0 +1,99 @@ +#ifndef F3D_H +#define F3D_H +#include "Types.h" + +#define F3D_MTX_STACKSIZE 10 + +#define F3D_MTX_MODELVIEW 0x00 +#define F3D_MTX_PROJECTION 0x01 +#define F3D_MTX_MUL 0x00 +#define F3D_MTX_LOAD 0x02 +#define F3D_MTX_NOPUSH 0x00 +#define F3D_MTX_PUSH 0x04 + +#define F3D_TEXTURE_ENABLE 0x00000002 +#define F3D_SHADING_SMOOTH 0x00000200 +#define F3D_CULL_FRONT 0x00001000 +#define F3D_CULL_BACK 0x00002000 +#define F3D_CULL_BOTH 0x00003000 +#define F3D_CLIPPING 0x00000000 + +#define F3D_MV_VIEWPORT 0x80 + +#define F3D_MWO_aLIGHT_1 0x00 +#define F3D_MWO_bLIGHT_1 0x04 +#define F3D_MWO_aLIGHT_2 0x20 +#define F3D_MWO_bLIGHT_2 0x24 +#define F3D_MWO_aLIGHT_3 0x40 +#define F3D_MWO_bLIGHT_3 0x44 +#define F3D_MWO_aLIGHT_4 0x60 +#define F3D_MWO_bLIGHT_4 0x64 +#define F3D_MWO_aLIGHT_5 0x80 +#define F3D_MWO_bLIGHT_5 0x84 +#define F3D_MWO_aLIGHT_6 0xa0 +#define F3D_MWO_bLIGHT_6 0xa4 +#define F3D_MWO_aLIGHT_7 0xc0 +#define F3D_MWO_bLIGHT_7 0xc4 +#define F3D_MWO_aLIGHT_8 0xe0 +#define F3D_MWO_bLIGHT_8 0xe4 + +// FAST3D commands +#define F3D_SPNOOP 0x00 +#define F3D_MTX 0x01 +#define F3D_RESERVED0 0x02 +#define F3D_MOVEMEM 0x03 +#define F3D_VTX 0x04 +#define F3D_RESERVED1 0x05 +#define F3D_DL 0x06 +#define F3D_RESERVED2 0x07 +#define F3D_RESERVED3 0x08 +#define F3D_SPRITE2D_BASE 0x09 + +#define F3D_TRI1 0xBF +#define F3D_CULLDL 0xBE +#define F3D_POPMTX 0xBD +#define F3D_MOVEWORD 0xBC +#define F3D_TEXTURE 0xBB +#define F3D_SETOTHERMODE_H 0xBA +#define F3D_SETOTHERMODE_L 0xB9 +#define F3D_ENDDL 0xB8 +#define F3D_SETGEOMETRYMODE 0xB7 +#define F3D_CLEARGEOMETRYMODE 0xB6 +//#define F3D_LINE3D 0xB5 // Only used in Line3D +#define F3D_QUAD 0xB5 +#define F3D_RDPHALF_1 0xB4 +#define F3D_RDPHALF_2 0xB3 +#define F3D_RDPHALF_CONT 0xB2 +#define F3D_TRI4 0xB1 + +#define F3D_TRI_UNKNOWN 0xC0 + +void F3D_SPNoOp( u32 w0, u32 w1 ); +void F3D_Mtx( u32 w0, u32 w1 ); +void F3D_Reserved0( u32 w0, u32 w1 ); +void F3D_MoveMem( u32 w0, u32 w1 ); +void F3D_Vtx( u32 w0, u32 w1 ); +void F3D_Reserved1( u32 w0, u32 w1 ); +void F3D_DList( u32 w0, u32 w1 ); +void F3D_Reserved2( u32 w0, u32 w1 ); +void F3D_Reserved3( u32 w0, u32 w1 ); +void F3D_Sprite2D_Base( u32 w0, u32 w1 ); +void F3D_Tri1( u32 w0, u32 w1 ); +void F3D_CullDL( u32 w0, u32 w1 ); +void F3D_PopMtx( u32 w0, u32 w1 ); +void F3D_MoveWord( u32 w0, u32 w1 ); +void F3D_Texture( u32 w0, u32 w1 ); +void F3D_SetOtherMode_H( u32 w0, u32 w1 ); +void F3D_SetOtherMode_L( u32 w0, u32 w1 ); +void F3D_EndDL( u32 w0, u32 w1 ); +void F3D_SetGeometryMode( u32 w0, u32 w1 ); +void F3D_ClearGeometryMode( u32 w0, u32 w1 ); +//void F3D_Line3D( u32 w0, u32 w1 ); +void F3D_Quad( u32 w0, u32 w1 ); +void F3D_RDPHalf_1( u32 w0, u32 w1 ); +void F3D_RDPHalf_2( u32 w0, u32 w1 ); +void F3D_RDPHalf_Cont( u32 w0, u32 w1 ); +void F3D_Tri4( u32 w0, u32 w1 ); +void F3D_Init(); +#endif + diff --git a/source/gles2n64/src/F3DCBFD.cpp b/source/gles2n64/src/F3DCBFD.cpp new file mode 100644 index 0000000..d96449b --- /dev/null +++ b/source/gles2n64/src/F3DCBFD.cpp @@ -0,0 +1,212 @@ +#include "Common.h" +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DEX.h" +#include "F3DEX2.h" +#include "F3DCBFD.h" +#include "S2DEX.h" +#include "S2DEX2.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" +#include "OpenGL.h" +#include "Config.h" + +//BASED ON GLIDE64 Implementation + +u32 normal_address = 0; + +void F3DCBFD_Vtx(u32 w0, u32 w1) +{ + + s32 v0, n; + u32 address; + n = (w0 >> 12)&0xFF; + v0 = ((w0 >> 1)&0x7F) - n; + address = RSP_SegmentToPhysical(w1); + + if (v0 < 0) + { + return; + } + + gSPFlushTriangles(); + + Vertex* vertex = (Vertex*)&RDRAM[address]; + + for (s32 i=0; i < n; i++) + { + u32 v; +#ifdef __TRIBUFFER_OPT + v = __indexmap_getnew(i, 1); +#else + v = i; +#endif + + OGL.triangles.vertices[v].x = vertex->x; + OGL.triangles.vertices[v].y = vertex->y; + OGL.triangles.vertices[v].z = vertex->z; + OGL.triangles.vertices[v].w = 1.0f; + + OGL.triangles.vertices[v].s = _FIXED2FLOAT(vertex->s, 5); + OGL.triangles.vertices[v].t = _FIXED2FLOAT(vertex->t, 5); + + if (config.enableLighting && gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v].nx = ((s8*)RDRAM)[(normal_address + (i<<2) + (v0<<1) + 0)^3]; + OGL.triangles.vertices[v].ny = ((s8*)RDRAM)[(normal_address + (i<<2) + (v0<<1) + 1)^3]; + OGL.triangles.vertices[v].nz = (s8)(vertex->flag&0xff); + } + + gSPProcessVertex(v); + + if (config.enableLighting && gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v].r = OGL.triangles.vertices[v].r * vertex->color.r * 0.0039215689f; + OGL.triangles.vertices[v].g = OGL.triangles.vertices[v].g * vertex->color.g * 0.0039215689f; + OGL.triangles.vertices[v].b = OGL.triangles.vertices[v].b * vertex->color.b * 0.0039215689f; + OGL.triangles.vertices[v].a = OGL.triangles.vertices[v].a * vertex->color.a * 0.0039215689f; + } + else + { + OGL.triangles.vertices[v].r = vertex->color.r * 0.0039215689f; + OGL.triangles.vertices[v].g = vertex->color.g * 0.0039215689f; + OGL.triangles.vertices[v].b = vertex->color.b * 0.0039215689f; + OGL.triangles.vertices[v].a = vertex->color.a * 0.0039215689f; + } + vertex++; + } +} + +void F3DCBFD_MoveWord(u32 w0, u32 w1) +{ + u8 index = (u8)((w0 >> 16) & 0xFF); + u16 offset = (u16)(w0 & 0xFFFF); + + switch (index) + { + case G_MW_NUMLIGHT: + gSPNumLights(w1 / 48); + break; + + case G_MW_CLIP: + if (offset == 0x04) + { + gSPClipRatio( w1 ); + } + break; + + case G_MW_SEGMENT: + gSPSegment(_SHIFTR(offset, 2, 4), w1); + break; + + case G_MW_FOG: + gSPFogFactor( (s16)_SHIFTR( w1, 16, 16 ), (s16)_SHIFTR( w1, 0, 16 ) ); + break; + + case G_MV_COORDMOD: // moveword coord mod + break; + + default: + break; + } +} + +#define F3DCBFD_MV_VIEWPORT 8 +#define F3DCBFD_MV_LIGHT 10 +#define F3DCBFD_MV_NORMAL 14 + +void F3DCBFD_MoveMem(u32 w0, u32 w1) +{ +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif + switch (_SHIFTR( w0, 0, 8 )) + { + case F3DCBFD_MV_VIEWPORT: + gSPViewport(w1); + break; + + case F3DCBFD_MV_LIGHT: + { + u32 offset = _SHIFTR( w0, 8, 8 ) << 3; + if (offset >= 48) + { + gSPLight( w1, (offset - 24) / 24); + } + break; + } + + case F3DCBFD_MV_NORMAL: + normal_address = RSP_SegmentToPhysical(w1); + break; + + } +} + +void F3DCBFD_Tri4(u32 w0, u32 w1) +{ + gSP4Triangles( _SHIFTR(w0, 23, 5), _SHIFTR(w0, 18, 5), (_SHIFTR(w0, 15, 3 ) << 2) | _SHIFTR(w1, 30, 2), + _SHIFTR(w0, 10, 5), _SHIFTR(w0, 5, 5), _SHIFTR(w1, 0, 5), + _SHIFTR(w1, 25, 5), _SHIFTR(w1, 20, 5), _SHIFTR(w1, 15, 5), + _SHIFTR(w1, 10, 5), _SHIFTR(w1, 5, 5), _SHIFTR(w1, 0, 5)); +} + + +void F3DCBFD_Init() +{ + LOG(LOG_VERBOSE, "USING CBFD ucode!\n"); + + // Set GeometryMode flags + GBI_InitFlags(F3DEX2); + + GBI.PCStackSize = 10; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_RDPHALF_2, F3DEX2_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_SETOTHERMODE_H, F3DEX2_SETOTHERMODE_H, F3DEX2_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3DEX2_SETOTHERMODE_L, F3DEX2_SetOtherMode_L ); + GBI_SetGBI( G_RDPHALF_1, F3DEX2_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_SPNOOP, F3DEX2_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_ENDDL, F3DEX2_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_DL, F3DEX2_DL, F3D_DList ); + GBI_SetGBI( G_LOAD_UCODE, F3DEX2_LOAD_UCODE, F3DEX_Load_uCode ); + GBI_SetGBI( G_MOVEMEM, F3DEX2_MOVEMEM, F3DCBFD_MoveMem); + GBI_SetGBI( G_MOVEWORD, F3DEX2_MOVEWORD, F3DCBFD_MoveWord); + GBI_SetGBI( G_MTX, F3DEX2_MTX, F3DEX2_Mtx ); + GBI_SetGBI( G_GEOMETRYMODE, F3DEX2_GEOMETRYMODE, F3DEX2_GeometryMode ); + GBI_SetGBI( G_POPMTX, F3DEX2_POPMTX, F3DEX2_PopMtx ); + GBI_SetGBI( G_TEXTURE, F3DEX2_TEXTURE, F3DEX2_Texture ); + GBI_SetGBI( G_DMA_IO, F3DEX2_DMA_IO, F3DEX2_DMAIO ); + GBI_SetGBI( G_SPECIAL_1, F3DEX2_SPECIAL_1, F3DEX2_Special_1 ); + GBI_SetGBI( G_SPECIAL_2, F3DEX2_SPECIAL_2, F3DEX2_Special_2 ); + GBI_SetGBI( G_SPECIAL_3, F3DEX2_SPECIAL_3, F3DEX2_Special_3 ); + + + + GBI_SetGBI(G_VTX, F3DEX2_VTX, F3DCBFD_Vtx); + GBI_SetGBI(G_MODIFYVTX, F3DEX2_MODIFYVTX, F3DEX_ModifyVtx); + GBI_SetGBI(G_CULLDL, F3DEX2_CULLDL, F3DEX_CullDL); + GBI_SetGBI(G_BRANCH_Z, F3DEX2_BRANCH_Z, F3DEX_Branch_Z); + GBI_SetGBI(G_TRI1, F3DEX2_TRI1, F3DEX2_Tri1); + GBI_SetGBI(G_TRI2, F3DEX2_TRI2, F3DEX_Tri2); + GBI_SetGBI(G_QUAD, F3DEX2_QUAD, F3DEX2_Quad); +// GBI_SetGBI( G_LINE3D, F3DEX2_LINE3D, F3DEX2_Line3D ); + + //for some reason glide64 maps TRI4 to these locations: + + for(int i = 0x10; i <= 0x1F; i++) + { + GBI_SetGBI(G_TRI4, i, F3DCBFD_Tri4); + } + + GBI_SetGBI( G_BG_1CYC, S2DEX2_BG_1CYC, S2DEX_BG_1Cyc); + GBI_SetGBI( G_BG_COPY, S2DEX2_BG_COPY, S2DEX_BG_Copy); + GBI_SetGBI( G_OBJ_RENDERMODE, S2DEX2_OBJ_RENDERMODE, S2DEX_Obj_RenderMode); + +} + diff --git a/source/gles2n64/src/F3DCBFD.h b/source/gles2n64/src/F3DCBFD.h new file mode 100644 index 0000000..8984135 --- /dev/null +++ b/source/gles2n64/src/F3DCBFD.h @@ -0,0 +1,7 @@ +#ifndef F3DCBFD_H +#define F3DCBFD_H + + +void F3DCBFD_Init(); +#endif + diff --git a/source/gles2n64/src/F3DDKR.cpp b/source/gles2n64/src/F3DDKR.cpp new file mode 100644 index 0000000..6d4afe5 --- /dev/null +++ b/source/gles2n64/src/F3DDKR.cpp @@ -0,0 +1,124 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DDKR.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" +#include "OpenGL.h" + + +void F3DDKR_DMA_Mtx( u32 w0, u32 w1 ) +{ + if (_SHIFTR( w0, 0, 16 ) != 64) + { +// GBI_DetectUCode(); // Something's wrong +#ifdef DEBUG + DebugMsg( DEBUG_MEDIUM | DEBUG_HIGH | DEBUG_ERROR, "G_MTX: address = 0x%08X length = %i params = 0x%02X\n", w1, _SHIFTR( w0, 0, 16 ), _SHIFTR( w0, 16, 8 ) ); +#endif + return; + } + + u32 index = _SHIFTR( w0, 16, 4 ); + u32 multiply; + + if (index == 0) // DKR + { + index = _SHIFTR( w0, 22, 2 ); + multiply = 0; + } + else // Gemini + { + multiply = _SHIFTR( w0, 23, 1 ); + } + + gSPDMAMatrix( w1, index, multiply ); +} + +void F3DDKR_DMA_Vtx( u32 w0, u32 w1 ) +{ + if ((w0 & F3DDKR_VTX_APPEND)) + { + if (gSP.matrix.billboard) + gSP.vertexi = 1; + } + else + gSP.vertexi = 0; + + u32 n = _SHIFTR( w0, 19, 5 ) + 1; + + gSPDMAVertex( w1, n, gSP.vertexi + _SHIFTR( w0, 9, 5 ) ); + + gSP.vertexi += n; +} + +void F3DDKR_DMA_Tri( u32 w0, u32 w1 ) +{ + gSPDMATriangles( w1, _SHIFTR( w0, 4, 12 ) ); + gSP.vertexi = 0; +} + +void F3DDKR_DMA_DList( u32 w0, u32 w1 ) +{ + gSPDMADisplayList( w1, _SHIFTR( w0, 16, 8 ) ); +} + +void F3DDKR_DMA_Offsets( u32 w0, u32 w1 ) +{ + gSPSetDMAOffsets( _SHIFTR( w0, 0, 24 ), _SHIFTR( w1, 0, 24 ) ); +} + +void F3DDKR_MoveWord( u32 w0, u32 w1 ) +{ + switch (_SHIFTR( w0, 0, 8 )) + { + case 0x02: + gSP.matrix.billboard = w1 & 1; + break; + case 0x0A: + gSP.matrix.modelViewi = _SHIFTR( w1, 6, 2 ); + gSP.changed |= CHANGED_MATRIX; + break; + default: + F3D_MoveWord( w0, w1 ); + break; + } +} + +void F3DDKR_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3D ); + + GBI.PCStackSize = 10; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_DMA_MTX, F3DDKR_DMA_MTX, F3DDKR_DMA_Mtx ); + GBI_SetGBI( G_MOVEMEM, F3D_MOVEMEM, F3D_MoveMem ); + GBI_SetGBI( G_DMA_VTX, F3DDKR_DMA_VTX, F3DDKR_DMA_Vtx ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_DMA_DL, F3DDKR_DMA_DL, F3DDKR_DMA_DList ); + GBI_SetGBI( G_DMA_TRI, F3DDKR_DMA_TRI, F3DDKR_DMA_Tri ); + + GBI_SetGBI( G_DMA_OFFSETS, F3DDKR_DMA_OFFSETS, F3DDKR_DMA_Offsets ); + GBI_SetGBI( G_CULLDL, F3D_CULLDL, F3D_CullDL ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3DDKR_MoveWord ); + GBI_SetGBI( G_TEXTURE, F3D_TEXTURE, F3D_Texture ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_SETGEOMETRYMODE, F3D_SETGEOMETRYMODE, F3D_SetGeometryMode ); + GBI_SetGBI( G_CLEARGEOMETRYMODE, F3D_CLEARGEOMETRYMODE, F3D_ClearGeometryMode ); + GBI_SetGBI( G_QUAD, F3D_QUAD, F3D_Quad ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_RDPHALF_CONT, F3D_RDPHALF_CONT, F3D_RDPHalf_Cont ); + GBI_SetGBI( G_TRI4, F3D_TRI4, F3D_Tri4 ); + + gSPSetDMAOffsets( 0, 0 ); +} + diff --git a/source/gles2n64/src/F3DDKR.h b/source/gles2n64/src/F3DDKR.h new file mode 100644 index 0000000..d70ea46 --- /dev/null +++ b/source/gles2n64/src/F3DDKR.h @@ -0,0 +1,14 @@ +#ifndef F3DDKR_H +#define F3DDKR_H + +#define F3DDKR_VTX_APPEND 0x00010000 + +#define F3DDKR_DMA_MTX 0x01 +#define F3DDKR_DMA_VTX 0x04 +#define F3DDKR_DMA_TRI 0x05 +#define F3DDKR_DMA_DL 0x07 +#define F3DDKR_DMA_OFFSETS 0xBF + +void F3DDKR_Init(); +#endif + diff --git a/source/gles2n64/src/F3DEX.cpp b/source/gles2n64/src/F3DEX.cpp new file mode 100644 index 0000000..53fcd63 --- /dev/null +++ b/source/gles2n64/src/F3DEX.cpp @@ -0,0 +1,90 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DEX.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" + +void F3DEX_Vtx( u32 w0, u32 w1 ) +{ + gSPVertex( w1, _SHIFTR( w0, 10, 6 ), _SHIFTR( w0, 17, 7 ) ); +} + +void F3DEX_Tri1( u32 w0, u32 w1 ) +{ + gSP1Triangle( _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), _SHIFTR( w1, 1, 7 )); +} + +void F3DEX_CullDL( u32 w0, u32 w1 ) +{ + gSPCullDisplayList( _SHIFTR( w0, 1, 15 ), _SHIFTR( w1, 1, 15 ) ); +} + +void F3DEX_ModifyVtx( u32 w0, u32 w1 ) +{ + gSPModifyVertex( _SHIFTR( w0, 1, 15 ), _SHIFTR( w0, 16, 8 ), w1 ); +} + +void F3DEX_Tri2( u32 w0, u32 w1 ) +{ + gSP2Triangles( _SHIFTR( w0, 17, 7 ), _SHIFTR( w0, 9, 7 ), _SHIFTR( w0, 1, 7 ), 0, + _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), _SHIFTR( w1, 1, 7 ), 0); +} + +void F3DEX_Quad( u32 w0, u32 w1 ) +{ + gSP1Quadrangle( _SHIFTR( w1, 25, 7 ), _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), _SHIFTR( w1, 1, 7 ) ); +} + +void F3DEX_Branch_Z( u32 w0, u32 w1 ) +{ + gSPBranchLessZ( gDP.half_1, _SHIFTR( w0, 1, 11 ), (s32)w1 ); +} + +void F3DEX_Load_uCode( u32 w0, u32 w1 ) +{ + gSPLoadUcodeEx( w1, gDP.half_1, _SHIFTR( w0, 0, 16 ) + 1 ); +} + +void F3DEX_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3DEX ); + + GBI.PCStackSize = 18; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_MTX, F3D_MTX, F3D_Mtx ); + GBI_SetGBI( G_RESERVED0, F3D_RESERVED0, F3D_Reserved0 ); + GBI_SetGBI( G_MOVEMEM, F3D_MOVEMEM, F3D_MoveMem ); + GBI_SetGBI( G_VTX, F3D_VTX, F3DEX_Vtx ); + GBI_SetGBI( G_RESERVED1, F3D_RESERVED1, F3D_Reserved1 ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_RESERVED2, F3D_RESERVED2, F3D_Reserved2 ); + GBI_SetGBI( G_RESERVED3, F3D_RESERVED3, F3D_Reserved3 ); + GBI_SetGBI( G_SPRITE2D_BASE, F3D_SPRITE2D_BASE, F3D_Sprite2D_Base ); + + GBI_SetGBI( G_TRI1, F3D_TRI1, F3DEX_Tri1 ); + GBI_SetGBI( G_CULLDL, F3D_CULLDL, F3DEX_CullDL ); + GBI_SetGBI( G_POPMTX, F3D_POPMTX, F3D_PopMtx ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3D_MoveWord ); + GBI_SetGBI( G_TEXTURE, F3D_TEXTURE, F3D_Texture ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_SETGEOMETRYMODE, F3D_SETGEOMETRYMODE, F3D_SetGeometryMode ); + GBI_SetGBI( G_CLEARGEOMETRYMODE, F3D_CLEARGEOMETRYMODE, F3D_ClearGeometryMode ); + GBI_SetGBI( G_QUAD, F3D_QUAD, F3DEX_Quad ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_MODIFYVTX, F3DEX_MODIFYVTX, F3DEX_ModifyVtx ); + GBI_SetGBI( G_TRI2, F3DEX_TRI2, F3DEX_Tri2 ); + GBI_SetGBI( G_BRANCH_Z, F3DEX_BRANCH_Z, F3DEX_Branch_Z ); + GBI_SetGBI( G_LOAD_UCODE, F3DEX_LOAD_UCODE, F3DEX_Load_uCode ); +} + diff --git a/source/gles2n64/src/F3DEX.h b/source/gles2n64/src/F3DEX.h new file mode 100644 index 0000000..44d38f0 --- /dev/null +++ b/source/gles2n64/src/F3DEX.h @@ -0,0 +1,54 @@ +#ifndef F3DEX_H +#define F3DEX_H + +#define F3DEX_MTX_STACKSIZE 18 + +#define F3DEX_MTX_MODELVIEW 0x00 +#define F3DEX_MTX_PROJECTION 0x01 +#define F3DEX_MTX_MUL 0x00 +#define F3DEX_MTX_LOAD 0x02 +#define F3DEX_MTX_NOPUSH 0x00 +#define F3DEX_MTX_PUSH 0x04 + +#define F3DEX_TEXTURE_ENABLE 0x00000002 +#define F3DEX_SHADING_SMOOTH 0x00000200 +#define F3DEX_CULL_FRONT 0x00001000 +#define F3DEX_CULL_BACK 0x00002000 +#define F3DEX_CULL_BOTH 0x00003000 +#define F3DEX_CLIPPING 0x00800000 + +#define F3DEX_MV_VIEWPORT 0x80 + +#define F3DEX_MWO_aLIGHT_1 0x00 +#define F3DEX_MWO_bLIGHT_1 0x04 +#define F3DEX_MWO_aLIGHT_2 0x20 +#define F3DEX_MWO_bLIGHT_2 0x24 +#define F3DEX_MWO_aLIGHT_3 0x40 +#define F3DEX_MWO_bLIGHT_3 0x44 +#define F3DEX_MWO_aLIGHT_4 0x60 +#define F3DEX_MWO_bLIGHT_4 0x64 +#define F3DEX_MWO_aLIGHT_5 0x80 +#define F3DEX_MWO_bLIGHT_5 0x84 +#define F3DEX_MWO_aLIGHT_6 0xa0 +#define F3DEX_MWO_bLIGHT_6 0xa4 +#define F3DEX_MWO_aLIGHT_7 0xc0 +#define F3DEX_MWO_bLIGHT_7 0xc4 +#define F3DEX_MWO_aLIGHT_8 0xe0 +#define F3DEX_MWO_bLIGHT_8 0xe4 + +// F3DEX commands +#define F3DEX_MODIFYVTX 0xB2 +#define F3DEX_TRI2 0xB1 +#define F3DEX_BRANCH_Z 0xB0 +#define F3DEX_LOAD_UCODE 0xAF // 0xCF + +void F3DEX_Vtx( u32 w0, u32 w1 ); +void F3DEX_Tri1( u32 w0, u32 w1 ); +void F3DEX_CullDL( u32 w0, u32 w1 ); +void F3DEX_ModifyVtx( u32 w0, u32 w1 ); +void F3DEX_Tri2( u32 w0, u32 w1 ); +void F3DEX_Branch_Z( u32 w0, u32 w1 ); +void F3DEX_Load_uCode( u32 w0, u32 w1 ); +void F3DEX_Init(); +#endif + diff --git a/source/gles2n64/src/F3DEX2.cpp b/source/gles2n64/src/F3DEX2.cpp new file mode 100644 index 0000000..0d9f3c9 --- /dev/null +++ b/source/gles2n64/src/F3DEX2.cpp @@ -0,0 +1,255 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DEX.h" +#include "F3DEX2.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" +#include "OpenGL.h" + +#include "Config.h" + +void F3DEX2_Mtx( u32 w0, u32 w1 ) +{ + gSPMatrix( w1, _SHIFTR( w0, 0, 8 ) ^ G_MTX_PUSH ); +} + +void F3DEX2_MoveMem( u32 w0, u32 w1 ) +{ +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif + switch (_SHIFTR( w0, 0, 8 )) + { + case F3DEX2_MV_VIEWPORT: + gSPViewport( w1 ); + break; + + case G_MV_MATRIX: + gSPForceMatrix( w1 ); + RSP.PC[RSP.PCi] += 8; // force matrix takes two commands + break; + + case G_MV_LIGHT: + u32 offset = _SHIFTR( w0, 8, 8 ) << 3; + if (offset >= 48) + { + gSPLight( w1, (offset - 24) / 24); + } + break; + } +} + +void F3DEX2_Vtx( u32 w0, u32 w1 ) +{ + u32 n = _SHIFTR( w0, 12, 8 ); + + gSPVertex( w1, n, _SHIFTR( w0, 1, 7 ) - n ); +} + +void F3DEX2_Reserved1( u32 w0, u32 w1 ) +{ +} + +void F3DEX2_Tri1( u32 w0, u32 w1 ) +{ + gSP1Triangle( _SHIFTR( w0, 17, 7 ), + _SHIFTR( w0, 9, 7 ), + _SHIFTR( w0, 1, 7 )); +} + +void F3DEX2_PopMtx( u32 w0, u32 w1 ) +{ + gSPPopMatrixN( 0, w1 >> 6 ); +} + +void F3DEX2_MoveWord( u32 w0, u32 w1 ) +{ + switch (_SHIFTR( w0, 16, 8 )) + { + case G_MW_FORCEMTX: + // Handled in movemem + break; + case G_MW_MATRIX: + gSPInsertMatrix( _SHIFTR( w0, 0, 16 ), w1 ); + break; + case G_MW_NUMLIGHT: + gSPNumLights( w1 / 24 ); + break; + case G_MW_CLIP: + gSPClipRatio( w1 ); + break; + case G_MW_SEGMENT: + gSPSegment( _SHIFTR( w0, 0, 16 ) >> 2, w1 & 0x00FFFFFF ); + break; + case G_MW_FOG: + gSPFogFactor( (s16)_SHIFTR( w1, 16, 16 ), (s16)_SHIFTR( w1, 0, 16 ) ); + break; + case G_MW_LIGHTCOL: + gSPLightColor((_SHIFTR( w0, 0, 16 ) / 24) + 1, w1 ); + break; + case G_MW_PERSPNORM: + gSPPerspNormalize( w1 ); + break; + } +} + +void F3DEX2_Texture( u32 w0, u32 w1 ) +{ + gSPTexture( _FIXED2FLOAT( _SHIFTR( w1, 16, 16 ), 16 ), + _FIXED2FLOAT( _SHIFTR( w1, 0, 16 ), 16 ), + _SHIFTR( w0, 11, 3 ), + _SHIFTR( w0, 8, 3 ), + _SHIFTR( w0, 1, 7 ) ); +} + +void F3DEX2_SetOtherMode_H( u32 w0, u32 w1 ) +{ + switch (32 - _SHIFTR( w0, 8, 8 ) - (_SHIFTR( w0, 0, 8 ) + 1)) + { + case G_MDSFT_PIPELINE: + gDPPipelineMode( w1 >> G_MDSFT_PIPELINE ); + break; + case G_MDSFT_CYCLETYPE: + gDPSetCycleType( w1 >> G_MDSFT_CYCLETYPE ); + break; + case G_MDSFT_TEXTPERSP: + gDPSetTexturePersp( w1 >> G_MDSFT_TEXTPERSP ); + break; + case G_MDSFT_TEXTDETAIL: + gDPSetTextureDetail( w1 >> G_MDSFT_TEXTDETAIL ); + break; + case G_MDSFT_TEXTLOD: + gDPSetTextureLOD( w1 >> G_MDSFT_TEXTLOD ); + break; + case G_MDSFT_TEXTLUT: + gDPSetTextureLUT( w1 >> G_MDSFT_TEXTLUT ); + break; + case G_MDSFT_TEXTFILT: + gDPSetTextureFilter( w1 >> G_MDSFT_TEXTFILT ); + break; + case G_MDSFT_TEXTCONV: + gDPSetTextureConvert( w1 >> G_MDSFT_TEXTCONV ); + break; + case G_MDSFT_COMBKEY: + gDPSetCombineKey( w1 >> G_MDSFT_COMBKEY ); + break; + case G_MDSFT_RGBDITHER: + gDPSetColorDither( w1 >> G_MDSFT_RGBDITHER ); + break; + case G_MDSFT_ALPHADITHER: + gDPSetAlphaDither( w1 >> G_MDSFT_ALPHADITHER ); + break; + default: + u32 length = _SHIFTR( w0, 0, 8 ) + 1; + u32 shift = 32 - _SHIFTR( w0, 8, 8 ) - length; + u32 mask = ((1 << length) - 1) << shift; + + gDP.otherMode.h &= ~mask; + gDP.otherMode.h |= w1 & mask; + + gDP.changed |= CHANGED_CYCLETYPE; + break; + } +} + +void F3DEX2_SetOtherMode_L( u32 w0, u32 w1 ) +{ + switch (32 - _SHIFTR( w0, 8, 8 ) - (_SHIFTR( w0, 0, 8 ) + 1)) + { + case G_MDSFT_ALPHACOMPARE: + gDPSetAlphaCompare( w1 >> G_MDSFT_ALPHACOMPARE ); + break; + case G_MDSFT_ZSRCSEL: + gDPSetDepthSource( w1 >> G_MDSFT_ZSRCSEL ); + break; + case G_MDSFT_RENDERMODE: + gDPSetRenderMode( w1 & 0xCCCCFFFF, w1 & 0x3333FFFF ); + break; + default: + u32 length = _SHIFTR( w0, 0, 8 ) + 1; + u32 shift = 32 - _SHIFTR( w0, 8, 8 ) - length; + u32 mask = ((1 << length) - 1) << shift; + + gDP.otherMode.l &= ~mask; + gDP.otherMode.l |= w1 & mask; + + gDP.changed |= CHANGED_RENDERMODE | CHANGED_ALPHACOMPARE; + break; + } +} + +void F3DEX2_GeometryMode( u32 w0, u32 w1 ) +{ + gSPGeometryMode( ~_SHIFTR( w0, 0, 24 ), w1 ); +} + +void F3DEX2_DMAIO( u32 w0, u32 w1 ) +{ +} + +void F3DEX2_Special_1( u32 w0, u32 w1 ) +{ +} + +void F3DEX2_Special_2( u32 w0, u32 w1 ) +{ +} + +void F3DEX2_Special_3( u32 w0, u32 w1 ) +{ +} + +void F3DEX2_Quad( u32 w0, u32 w1 ) +{ + gSP2Triangles( _SHIFTR( w0, 17, 7 ), + _SHIFTR( w0, 9, 7 ), + _SHIFTR( w0, 1, 7 ), + 0, + _SHIFTR( w1, 17, 7 ), + _SHIFTR( w1, 9, 7 ), + _SHIFTR( w1, 1, 7 ), + 0 ); +} + +void F3DEX2_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3DEX2 ); + + GBI.PCStackSize = 18; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_RDPHALF_2, F3DEX2_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_SETOTHERMODE_H, F3DEX2_SETOTHERMODE_H, F3DEX2_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3DEX2_SETOTHERMODE_L, F3DEX2_SetOtherMode_L ); + GBI_SetGBI( G_RDPHALF_1, F3DEX2_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_SPNOOP, F3DEX2_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_ENDDL, F3DEX2_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_DL, F3DEX2_DL, F3D_DList ); + GBI_SetGBI( G_LOAD_UCODE, F3DEX2_LOAD_UCODE, F3DEX_Load_uCode ); + GBI_SetGBI( G_MOVEMEM, F3DEX2_MOVEMEM, F3DEX2_MoveMem ); + GBI_SetGBI( G_MOVEWORD, F3DEX2_MOVEWORD, F3DEX2_MoveWord ); + GBI_SetGBI( G_MTX, F3DEX2_MTX, F3DEX2_Mtx ); + GBI_SetGBI( G_GEOMETRYMODE, F3DEX2_GEOMETRYMODE, F3DEX2_GeometryMode ); + GBI_SetGBI( G_POPMTX, F3DEX2_POPMTX, F3DEX2_PopMtx ); + GBI_SetGBI( G_TEXTURE, F3DEX2_TEXTURE, F3DEX2_Texture ); + GBI_SetGBI( G_DMA_IO, F3DEX2_DMA_IO, F3DEX2_DMAIO ); + GBI_SetGBI( G_SPECIAL_1, F3DEX2_SPECIAL_1, F3DEX2_Special_1 ); + GBI_SetGBI( G_SPECIAL_2, F3DEX2_SPECIAL_2, F3DEX2_Special_2 ); + GBI_SetGBI( G_SPECIAL_3, F3DEX2_SPECIAL_3, F3DEX2_Special_3 ); + + GBI_SetGBI( G_VTX, F3DEX2_VTX, F3DEX2_Vtx ); + GBI_SetGBI( G_MODIFYVTX, F3DEX2_MODIFYVTX, F3DEX_ModifyVtx ); + GBI_SetGBI( G_CULLDL, F3DEX2_CULLDL, F3DEX_CullDL ); + GBI_SetGBI( G_BRANCH_Z, F3DEX2_BRANCH_Z, F3DEX_Branch_Z ); + GBI_SetGBI( G_TRI1, F3DEX2_TRI1, F3DEX2_Tri1 ); + GBI_SetGBI( G_TRI2, F3DEX2_TRI2, F3DEX_Tri2 ); + GBI_SetGBI( G_QUAD, F3DEX2_QUAD, F3DEX2_Quad ); +// GBI_SetGBI( G_LINE3D, F3DEX2_LINE3D, F3DEX2_Line3D ); +} + diff --git a/source/gles2n64/src/F3DEX2.h b/source/gles2n64/src/F3DEX2.h new file mode 100644 index 0000000..b598e12 --- /dev/null +++ b/source/gles2n64/src/F3DEX2.h @@ -0,0 +1,88 @@ +#ifndef F3DEX2_H +#define F3DEX2_H + +#define F3DEX2_MTX_STACKSIZE 18 + +#define F3DEX2_MTX_MODELVIEW 0x00 +#define F3DEX2_MTX_PROJECTION 0x04 +#define F3DEX2_MTX_MUL 0x00 +#define F3DEX2_MTX_LOAD 0x02 +#define F3DEX2_MTX_NOPUSH 0x00 +#define F3DEX2_MTX_PUSH 0x01 + +#define F3DEX2_TEXTURE_ENABLE 0x00000000 +#define F3DEX2_SHADING_SMOOTH 0x00200000 +#define F3DEX2_CULL_FRONT 0x00000200 +#define F3DEX2_CULL_BACK 0x00000400 +#define F3DEX2_CULL_BOTH 0x00000600 +#define F3DEX2_CLIPPING 0x00800000 + +#define F3DEX2_MV_VIEWPORT 8 + +#define F3DEX2_MWO_aLIGHT_1 0x00 +#define F3DEX2_MWO_bLIGHT_1 0x04 +#define F3DEX2_MWO_aLIGHT_2 0x18 +#define F3DEX2_MWO_bLIGHT_2 0x1c +#define F3DEX2_MWO_aLIGHT_3 0x30 +#define F3DEX2_MWO_bLIGHT_3 0x34 +#define F3DEX2_MWO_aLIGHT_4 0x48 +#define F3DEX2_MWO_bLIGHT_4 0x4c +#define F3DEX2_MWO_aLIGHT_5 0x60 +#define F3DEX2_MWO_bLIGHT_5 0x64 +#define F3DEX2_MWO_aLIGHT_6 0x78 +#define F3DEX2_MWO_bLIGHT_6 0x7c +#define F3DEX2_MWO_aLIGHT_7 0x90 +#define F3DEX2_MWO_bLIGHT_7 0x94 +#define F3DEX2_MWO_aLIGHT_8 0xa8 +#define F3DEX2_MWO_bLIGHT_8 0xac + + +#define F3DEX2_RDPHALF_2 0xF1 +#define F3DEX2_SETOTHERMODE_H 0xE3 +#define F3DEX2_SETOTHERMODE_L 0xE2 +#define F3DEX2_RDPHALF_1 0xE1 +#define F3DEX2_SPNOOP 0xE0 +#define F3DEX2_ENDDL 0xDF +#define F3DEX2_DL 0xDE +#define F3DEX2_LOAD_UCODE 0xDD +#define F3DEX2_MOVEMEM 0xDC +#define F3DEX2_MOVEWORD 0xDB +#define F3DEX2_MTX 0xDA +#define F3DEX2_GEOMETRYMODE 0xD9 +#define F3DEX2_POPMTX 0xD8 +#define F3DEX2_TEXTURE 0xD7 +#define F3DEX2_DMA_IO 0xD6 +#define F3DEX2_SPECIAL_1 0xD5 +#define F3DEX2_SPECIAL_2 0xD4 +#define F3DEX2_SPECIAL_3 0xD3 + +#define F3DEX2_VTX 0x01 +#define F3DEX2_MODIFYVTX 0x02 +#define F3DEX2_CULLDL 0x03 +#define F3DEX2_BRANCH_Z 0x04 +#define F3DEX2_TRI1 0x05 +#define F3DEX2_TRI2 0x06 +#define F3DEX2_QUAD 0x07 +//#define F3DEX2_LINE3D 0x08 + + +void F3DEX2_Mtx( u32 w0, u32 w1 ); +void F3DEX2_MoveMem( u32 w0, u32 w1 ); +void F3DEX2_Vtx( u32 w0, u32 w1 ); +void F3DEX2_Reserved1( u32 w0, u32 w1 ); +void F3DEX2_Tri1( u32 w0, u32 w1 ); +void F3DEX2_PopMtx( u32 w0, u32 w1 ); +void F3DEX2_MoveWord( u32 w0, u32 w1 ); +void F3DEX2_Texture( u32 w0, u32 w1 ); +void F3DEX2_SetOtherMode_H( u32 w0, u32 w1 ); +void F3DEX2_SetOtherMode_L( u32 w0, u32 w1 ); +void F3DEX2_GeometryMode( u32 w0, u32 w1 ); +//void F3DEX2_Line3D( u32 w0, u32 w1 ); +void F3DEX2_DMAIO( u32 w0, u32 w1 ); +void F3DEX2_Special_1( u32 w0, u32 w1 ); +void F3DEX2_Special_2( u32 w0, u32 w1 ); +void F3DEX2_Special_3( u32 w0, u32 w1 ); +void F3DEX2_Quad( u32 w0, u32 w1 ); +void F3DEX2_Init(); +#endif + diff --git a/source/gles2n64/src/F3DPD.cpp b/source/gles2n64/src/F3DPD.cpp new file mode 100644 index 0000000..fe521c4 --- /dev/null +++ b/source/gles2n64/src/F3DPD.cpp @@ -0,0 +1,59 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DPD.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" + +void F3DPD_Vtx( u32 w0, u32 w1 ) +{ + gSPCIVertex( w1, _SHIFTR( w0, 20, 4 ) + 1, _SHIFTR( w0, 16, 4 ) ); +} + +void F3DPD_VtxColorBase( u32 w0, u32 w1 ) +{ + gSPSetVertexColorBase( w1 ); +} + +void F3DPD_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3D ); + + GBI.PCStackSize = 10; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_MTX, F3D_MTX, F3D_Mtx ); + GBI_SetGBI( G_RESERVED0, F3D_RESERVED0, F3D_Reserved0 ); + GBI_SetGBI( G_MOVEMEM, F3D_MOVEMEM, F3D_MoveMem ); + GBI_SetGBI( G_VTX, F3D_VTX, F3DPD_Vtx ); + GBI_SetGBI( G_RESERVED1, F3D_RESERVED1, F3D_Reserved1 ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_VTXCOLORBASE, F3DPD_VTXCOLORBASE, F3DPD_VtxColorBase ); + GBI_SetGBI( G_RESERVED3, F3D_RESERVED3, F3D_Reserved3 ); + GBI_SetGBI( G_SPRITE2D_BASE, F3D_SPRITE2D_BASE, F3D_Sprite2D_Base ); + + GBI_SetGBI( G_TRI1, F3D_TRI1, F3D_Tri1 ); + GBI_SetGBI( G_CULLDL, F3D_CULLDL, F3D_CullDL ); + GBI_SetGBI( G_POPMTX, F3D_POPMTX, F3D_PopMtx ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3D_MoveWord ); + GBI_SetGBI( G_TEXTURE, F3D_TEXTURE, F3D_Texture ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_SETGEOMETRYMODE, F3D_SETGEOMETRYMODE, F3D_SetGeometryMode ); + GBI_SetGBI( G_CLEARGEOMETRYMODE, F3D_CLEARGEOMETRYMODE, F3D_ClearGeometryMode ); + GBI_SetGBI( G_QUAD, F3D_QUAD, F3D_Quad ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_RDPHALF_CONT, F3D_RDPHALF_CONT, F3D_RDPHalf_Cont ); + GBI_SetGBI( G_TRI4, F3D_TRI4, F3D_Tri4 ); + + gSPSetDMAOffsets( 0, 0 ); +} + diff --git a/source/gles2n64/src/F3DPD.h b/source/gles2n64/src/F3DPD.h new file mode 100644 index 0000000..5d7c6e0 --- /dev/null +++ b/source/gles2n64/src/F3DPD.h @@ -0,0 +1,8 @@ +#ifndef F3DPD_H +#define F3DPD_H + +#define F3DPD_VTXCOLORBASE 0x07 + +void F3DPD_Init(); +#endif + diff --git a/source/gles2n64/src/F3DWRUS.cpp b/source/gles2n64/src/F3DWRUS.cpp new file mode 100644 index 0000000..8b1ff28 --- /dev/null +++ b/source/gles2n64/src/F3DWRUS.cpp @@ -0,0 +1,73 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DEX.h" +#include "F3DWRUS.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" + + +void F3DWRUS_Vtx( u32 w0, u32 w1 ) +{ + gSPVertex( w1, _SHIFTR( w0, 9, 7 ), _SHIFTR( w0, 16, 8 ) / 5 ); +} + +void F3DWRUS_Tri1( u32 w0, u32 w1 ) +{ + gSP1Triangle( _SHIFTR( w1, 16, 8 ) / 5, + _SHIFTR( w1, 8, 8 ) / 5, + _SHIFTR( w1, 0, 8 ) / 5); +} + +void F3DWRUS_Tri2( u32 w0, u32 w1 ) +{ + gSP2Triangles( _SHIFTR( w0, 16, 8 ) / 5, _SHIFTR( w0, 8, 8 ) / 5, _SHIFTR( w0, 0, 8 ) / 5, 0, + _SHIFTR( w1, 16, 8 ) / 5, _SHIFTR( w1, 8, 8 ) / 5, _SHIFTR( w1, 0, 8 ) / 5, 0); +} + +void F3DWRUS_Quad( u32 w0, u32 w1 ) +{ + gSP1Quadrangle( _SHIFTR( w1, 24, 8 ) / 5, _SHIFTR( w1, 16, 8 ) / 5, _SHIFTR( w1, 8, 8 ) / 5, _SHIFTR( w1, 0, 8 ) / 5 ); +} + + +void F3DWRUS_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3D ); + + GBI.PCStackSize = 10; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_MTX, F3D_MTX, F3D_Mtx ); + GBI_SetGBI( G_RESERVED0, F3D_RESERVED0, F3D_Reserved0 ); + GBI_SetGBI( G_MOVEMEM, F3D_MOVEMEM, F3D_MoveMem ); + GBI_SetGBI( G_VTX, F3D_VTX, F3DWRUS_Vtx ); + GBI_SetGBI( G_RESERVED1, F3D_RESERVED1, F3D_Reserved1 ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_RESERVED2, F3D_RESERVED2, F3D_Reserved2 ); + GBI_SetGBI( G_RESERVED3, F3D_RESERVED3, F3D_Reserved3 ); + GBI_SetGBI( G_SPRITE2D_BASE, F3D_SPRITE2D_BASE, F3D_Sprite2D_Base ); + + GBI_SetGBI( G_TRI1, F3D_TRI1, F3DWRUS_Tri1 ); + GBI_SetGBI( G_CULLDL, F3D_CULLDL, F3D_CullDL ); + GBI_SetGBI( G_POPMTX, F3D_POPMTX, F3D_PopMtx ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3D_MoveWord ); + GBI_SetGBI( G_TEXTURE, F3D_TEXTURE, F3D_Texture ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_SETGEOMETRYMODE, F3D_SETGEOMETRYMODE, F3D_SetGeometryMode ); + GBI_SetGBI( G_CLEARGEOMETRYMODE, F3D_CLEARGEOMETRYMODE, F3D_ClearGeometryMode ); + GBI_SetGBI( G_QUAD, F3D_QUAD, F3DWRUS_Quad ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_RDPHALF_CONT, F3D_RDPHALF_CONT, F3D_RDPHalf_Cont ); + GBI_SetGBI( G_TRI2, F3DWRUS_TRI2, F3DWRUS_Tri2 ); +} + diff --git a/source/gles2n64/src/F3DWRUS.h b/source/gles2n64/src/F3DWRUS.h new file mode 100644 index 0000000..f38de7e --- /dev/null +++ b/source/gles2n64/src/F3DWRUS.h @@ -0,0 +1,8 @@ +#ifndef F3DWRUS_H +#define F3DWRUS_H + +#define F3DWRUS_TRI2 0xB1 +void F3DWRUS_Init(); + +#endif + diff --git a/source/gles2n64/src/FrameSkipper.cpp b/source/gles2n64/src/FrameSkipper.cpp new file mode 100644 index 0000000..84c3428 --- /dev/null +++ b/source/gles2n64/src/FrameSkipper.cpp @@ -0,0 +1,61 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright (C) 2011 yongzh (freeman.yong@gmail.com) * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include "FrameSkipper.h" +#include "ticks.h" + +FrameSkipper::FrameSkipper() + : skipType(AUTO), maxSkips(2), targetFPS(60) +{ +} + +void FrameSkipper::start() +{ + initialTicks = 0; + virtualCount = 0; + skipCounter = 0; +} + +void FrameSkipper::update() +{ + // for the first frame + if (initialTicks == 0) { + initialTicks = ticksGetTicks(); + return; + } + + unsigned int elapsed = ticksGetTicks() - initialTicks; + unsigned int realCount = elapsed * targetFPS / 1000; + + virtualCount++; + if (realCount >= virtualCount) { + if (realCount > virtualCount && + skipType == AUTO && skipCounter < maxSkips) { + skipCounter++; + } else { + virtualCount = realCount; + if (skipType == AUTO) + skipCounter = 0; + } + } + if (skipType == MANUAL) { + if (++skipCounter > maxSkips) + skipCounter = 0; + } +} diff --git a/source/gles2n64/src/FrameSkipper.h b/source/gles2n64/src/FrameSkipper.h new file mode 100644 index 0000000..2b2ccb4 --- /dev/null +++ b/source/gles2n64/src/FrameSkipper.h @@ -0,0 +1,55 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright (C) 2011 yongzh (freeman.yong@gmail.com) * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef FRAME_SKIPPER_H +#define FRAME_SKIPPER_H + +class FrameSkipper { +public: + enum { AUTO, MANUAL }; + + FrameSkipper(); + + void setSkips(int type, int max) { + skipType = type; + maxSkips = max; + } + + void setTargetFPS(int fps) { + targetFPS = fps; + } + + bool willSkipNext() { + return (skipCounter > 0); + } + + void start(); + void update(); + +private: + int skipType; + int maxSkips; + int targetFPS; + int skipCounter; + unsigned int initialTicks; + unsigned int virtualCount; +}; + +#endif + diff --git a/source/gles2n64/src/GBI.cpp b/source/gles2n64/src/GBI.cpp new file mode 100644 index 0000000..daa6de7 --- /dev/null +++ b/source/gles2n64/src/GBI.cpp @@ -0,0 +1,975 @@ +#include +#include +#include +#include "gles2N64.h" +#include "GBI.h" +#include "RDP.h" +#include "RSP.h" +#include "F3D.h" +#include "F3DEX.h" +#include "F3DEX2.h" +#include "L3D.h" +#include "L3DEX.h" +#include "L3DEX2.h" +#include "S2DEX.h" +#include "S2DEX2.h" +#include "F3DDKR.h" +#include "F3DWRUS.h" +#include "F3DPD.h" +#include "F3DCBFD.h" +#include "Types.h" +# include +# include +# include +# include "convert.h" +#include "Common.h" +#include "ticks.h" + +#include "CRC.h" +#include "Debug.h" + +u32 uc_crc, uc_dcrc; +char uc_str[256]; + +SpecialMicrocodeInfo specialMicrocodes[] = +{ + {F3DWRUS, FALSE, 0xd17906e2, "RSP SW Version: 2.0D, 04-01-96"}, + {F3DWRUS, FALSE, 0x94c4c833, "RSP SW Version: 2.0D, 04-01-96"}, + {S2DEX, FALSE, 0x9df31081, "RSP Gfx ucode S2DEX 1.06 Yoshitaka Yasumoto Nintendo."}, + {F3DDKR, FALSE, 0x8d91244f, "Diddy Kong Racing"}, + {F3DDKR, FALSE, 0x6e6fc893, "Diddy Kong Racing"}, + {F3DDKR, FALSE, 0xbde9d1fb, "Jet Force Gemini"}, + {F3DPD, FALSE, 0x1c4f7869, "Perfect Dark"}, + {F3DEX, FALSE, 0x0ace4c3f, "Mario Kart"}, + //{F3DEX, FALSE, 0xda51ccdb, "Rogue Squadron"}, + //{F3DCBFD, FALSE, 0x1b4ace88, "RSP Gfx ucode F3DEXBG.NoN fifo 2.08 Yoshitaka Yasumoto 1999 Nintendo."}, +}; + +u32 G_RDPHALF_1, G_RDPHALF_2, G_RDPHALF_CONT; +u32 G_SPNOOP; +u32 G_SETOTHERMODE_H, G_SETOTHERMODE_L; +u32 G_DL, G_ENDDL, G_CULLDL, G_BRANCH_Z; +u32 G_LOAD_UCODE; +u32 G_MOVEMEM, G_MOVEWORD; +u32 G_MTX, G_POPMTX; +u32 G_GEOMETRYMODE, G_SETGEOMETRYMODE, G_CLEARGEOMETRYMODE; +u32 G_TEXTURE; +u32 G_DMA_IO, G_DMA_DL, G_DMA_TRI, G_DMA_MTX, G_DMA_VTX, G_DMA_OFFSETS; +u32 G_SPECIAL_1, G_SPECIAL_2, G_SPECIAL_3; +u32 G_VTX, G_MODIFYVTX, G_VTXCOLORBASE; +u32 G_TRI1, G_TRI2, G_TRI4; +u32 G_QUAD, G_LINE3D; +u32 G_RESERVED0, G_RESERVED1, G_RESERVED2, G_RESERVED3; +u32 G_SPRITE2D_BASE; +u32 G_BG_1CYC, G_BG_COPY; +u32 G_OBJ_RECTANGLE, G_OBJ_SPRITE, G_OBJ_MOVEMEM; +u32 G_SELECT_DL, G_OBJ_RENDERMODE, G_OBJ_RECTANGLE_R; +u32 G_OBJ_LOADTXTR, G_OBJ_LDTX_SPRITE, G_OBJ_LDTX_RECT, G_OBJ_LDTX_RECT_R; +u32 G_RDPHALF_0, G_TRI_UNKNOWN; + +u32 G_MTX_STACKSIZE; +u32 G_MTX_MODELVIEW; +u32 G_MTX_PROJECTION; +u32 G_MTX_MUL; +u32 G_MTX_LOAD; +u32 G_MTX_NOPUSH; +u32 G_MTX_PUSH; + +u32 G_TEXTURE_ENABLE; +u32 G_SHADING_SMOOTH; +u32 G_CULL_FRONT; +u32 G_CULL_BACK; +u32 G_CULL_BOTH; +u32 G_CLIPPING; + +u32 G_MV_VIEWPORT; + +u32 G_MWO_aLIGHT_1, G_MWO_bLIGHT_1; +u32 G_MWO_aLIGHT_2, G_MWO_bLIGHT_2; +u32 G_MWO_aLIGHT_3, G_MWO_bLIGHT_3; +u32 G_MWO_aLIGHT_4, G_MWO_bLIGHT_4; +u32 G_MWO_aLIGHT_5, G_MWO_bLIGHT_5; +u32 G_MWO_aLIGHT_6, G_MWO_bLIGHT_6; +u32 G_MWO_aLIGHT_7, G_MWO_bLIGHT_7; +u32 G_MWO_aLIGHT_8, G_MWO_bLIGHT_8; + +//GBIFunc GBICmd[256]; +GBIInfo GBI; + +void GBI_Unknown( u32 w0, u32 w1 ) +{ +} + +#if 0 +INT_PTR CALLBACK MicrocodeDlgProc( HWND hWndDlg, UINT uMsg, WPARAM wParam, LPARAM lParam ) +{ + switch (uMsg) + { + case WM_INITDIALOG: + for (int i = 0; i < numMicrocodeTypes; i++) + { + SendDlgItemMessage( hWndDlg, IDC_MICROCODE, CB_ADDSTRING, 0, (LPARAM)MicrocodeTypes[i] ); + } + SendDlgItemMessage( hWndDlg, IDC_MICROCODE, CB_SETCURSEL, 0, 0 ); + + char text[1024]; + sprintf( text, "Microcode CRC:\t\t0x%08x\r\nMicrocode Data CRC:\t0x%08x\r\nMicrocode Text:\t\t%s", uc_crc, uc_dcrc, uc_str ); + SendDlgItemMessage( hWndDlg, IDC_TEXTBOX, WM_SETTEXT, NULL, (LPARAM)text ); + return TRUE; + + case WM_COMMAND: + switch (LOWORD(wParam)) + { + case IDOK: + EndDialog( hWndDlg, SendDlgItemMessage( hWndDlg, IDC_MICROCODE, CB_GETCURSEL, 0, 0 ) ); + return TRUE; + + case IDCANCEL: + EndDialog( hWndDlg, NONE ); + return TRUE; + } + break; + } + + return FALSE; +} +#elif defined(USE_GTK) +static int selectedMicrocode = -1; +static GtkWidget *microcodeWindow = 0; +static GtkWidget *microcodeList = 0; + +static void okButton_clicked( GtkWidget *widget, void *data ) +{ + gtk_widget_hide( microcodeWindow ); + if (GTK_LIST(microcodeList)->selection != 0) + { + char *text = 0; + GtkListItem *item = GTK_LIST_ITEM(GTK_LIST(microcodeList)->selection->data); + GtkLabel *label = GTK_LABEL(GTK_BIN(item)->child); + gtk_label_get( label, &text ); + if (text != 0) + for (int i = 0; i < numMicrocodeTypes; i++) + if (!strcmp( text, MicrocodeTypes[i] )) + { + selectedMicrocode = i; + return; + } + } + + selectedMicrocode = NONE; +} + +static void stopButton_clicked( GtkWidget *widget, void *data ) +{ + gtk_widget_hide( microcodeWindow ); + selectedMicrocode = NONE; +} + +static gint +delete_question_event(GtkWidget *widget, GdkEvent *event, gpointer data) +{ + return TRUE; // undeleteable +} + +static int MicrocodeDialog() +{ + GtkWidget *infoLabel; + GtkWidget *infoFrame, *infoTable; + GtkWidget *crcInfoLabel, *crcDataInfoLabel, *textInfoLabel; + GtkWidget *crcLabel = NULL, *crcDataLabel = NULL, *textLabel = NULL; + GtkWidget *selectUcodeLabel; + //GtkWidget *microcodeLabel; + GtkWidget *okButton, *stopButton; + GList *ucodeList = 0; + char buf[1024]; + + if (!g_thread_supported()) + g_thread_init( NULL ); + gdk_threads_enter(); + + // create dialog + if (microcodeWindow == 0) + { + microcodeWindow = gtk_dialog_new(); + gtk_signal_connect( GTK_OBJECT(microcodeWindow), "delete_event", + GTK_SIGNAL_FUNC(delete_question_event), (gpointer)NULL ); + sprintf( buf, "%s - unknown microcode", pluginName ); + gtk_window_set_title( GTK_WINDOW(microcodeWindow), buf ); + gtk_container_set_border_width( GTK_CONTAINER(GTK_DIALOG(microcodeWindow)->vbox), 11 ); + + // ok button + okButton = gtk_button_new_with_label( "Ok" ); + gtk_signal_connect_object( GTK_OBJECT(okButton), "clicked", + GTK_SIGNAL_FUNC(okButton_clicked), NULL ); + gtk_container_add( GTK_CONTAINER(GTK_DIALOG(microcodeWindow)->action_area), okButton ); + + // stop button + stopButton = gtk_button_new_with_label( "Stop" ); + gtk_signal_connect_object( GTK_OBJECT(stopButton), "clicked", + GTK_SIGNAL_FUNC(stopButton_clicked), NULL ); + gtk_container_add( GTK_CONTAINER(GTK_DIALOG(microcodeWindow)->action_area), stopButton ); + + // info label + infoLabel = gtk_label_new( "Unknown microcode. Please notify Orkin, including the following information:" ); + gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), infoLabel ); + + // info frame + infoFrame = gtk_frame_new( "Microcode info" ); + gtk_container_set_border_width( GTK_CONTAINER(infoFrame), 7 ); + gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), infoFrame ); + + infoTable = gtk_table_new( 3, 2, FALSE ); + gtk_container_set_border_width( GTK_CONTAINER(infoTable), 7 ); + gtk_table_set_col_spacings( GTK_TABLE(infoTable), 3 ); + gtk_table_set_row_spacings( GTK_TABLE(infoTable), 3 ); + gtk_container_add( GTK_CONTAINER(infoFrame), infoTable ); + + crcInfoLabel = gtk_label_new( "Microcode CRC:" ); + crcDataInfoLabel = gtk_label_new( "Microcode Data CRC:" ); + textInfoLabel = gtk_label_new( "Microcode Text:" ); + + crcLabel = gtk_label_new( "" ); + crcDataLabel = gtk_label_new( "" ); + textLabel = gtk_label_new( "" ); + + gtk_table_attach_defaults( GTK_TABLE(infoTable), crcInfoLabel, 0, 1, 0, 1 ); + gtk_table_attach_defaults( GTK_TABLE(infoTable), crcLabel, 1, 2, 0, 1 ); + gtk_table_attach_defaults( GTK_TABLE(infoTable), crcDataInfoLabel, 0, 1, 1, 2 ); + gtk_table_attach_defaults( GTK_TABLE(infoTable), crcDataLabel, 1, 2, 1, 2 ); + gtk_table_attach_defaults( GTK_TABLE(infoTable), textInfoLabel, 0, 1, 2, 3 ); + gtk_table_attach_defaults( GTK_TABLE(infoTable), textLabel, 1, 2, 2, 3 ); + + selectUcodeLabel = gtk_label_new( "You can manually select the closest matching microcode." ); + for (int i = 0; i < numMicrocodeTypes; i++) + ucodeList = g_list_append( ucodeList, gtk_list_item_new_with_label( MicrocodeTypes[i] ) ); + microcodeList = gtk_list_new(); + gtk_list_set_selection_mode( GTK_LIST(microcodeList), GTK_SELECTION_SINGLE ); + gtk_list_append_items( GTK_LIST(microcodeList), ucodeList ); + + gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), selectUcodeLabel ); + gtk_box_pack_start_defaults( GTK_BOX(GTK_DIALOG(microcodeWindow)->vbox), microcodeList ); + } + + snprintf( buf, 1024, "0x%8.8X", (unsigned int)uc_crc ); + if(crcLabel) gtk_label_set_text( GTK_LABEL(crcLabel), buf ); + snprintf( buf, 1024, "0x%8.8X", (unsigned int)uc_dcrc ); + if(crcDataLabel) gtk_label_set_text( GTK_LABEL(crcDataLabel), buf ); + if(textLabel) gtk_label_set_text( GTK_LABEL(textLabel), uc_str ); + + selectedMicrocode = -1; + gtk_widget_show_all( microcodeWindow ); + + while (selectedMicrocode == -1) + { + if( gtk_main_iteration() ) + break; + usleep( 10000 ); + } + gdk_threads_leave(); + + return selectedMicrocode; +} +#else +static int MicrocodeDialog() +{ + // FIXME + return 0; +} +#endif + +MicrocodeInfo *GBI_AddMicrocode() +{ + MicrocodeInfo *newtop = (MicrocodeInfo*)malloc( sizeof( MicrocodeInfo ) ); + + newtop->lower = GBI.top; + newtop->higher = NULL; + + if (GBI.top) + GBI.top->higher = newtop; + + if (!GBI.bottom) + GBI.bottom = newtop; + + GBI.top = newtop; + + GBI.numMicrocodes++; + + + return newtop; +} + +void GBI_Init() +{ + GBI.top = NULL; + GBI.bottom = NULL; + GBI.current = NULL; + GBI.numMicrocodes = 0; + + for (u32 i = 0; i <= 0xFF; i++) + GBI.cmd[i] = GBI_Unknown; + +#ifdef PROFILE_GBI + GBI_ProfileInit(); +#endif +} + +void GBI_Destroy() +{ + while (GBI.bottom) + { + MicrocodeInfo *newBottom = GBI.bottom->higher; + + if (GBI.bottom == GBI.top) + GBI.top = NULL; + + free( GBI.bottom ); + + GBI.bottom = newBottom; + + if (GBI.bottom) + GBI.bottom->lower = NULL; + + GBI.numMicrocodes--; + } +} + +#ifdef PROFILE_GBI +void GBI_ProfileInit() +{ + GBI_ProfileReset(); +} + +void GBI_ProfileBegin(u32 cmd) +{ + GBI.profileTmp = ticksGetTicks(); +} + +void GBI_ProfileEnd(u32 cmd) +{ + unsigned int i = 256*GBI.current->type + cmd; + GBI.profileNum[i]++; + GBI.profileTimer[i] += ticksGetTicks() - GBI.profileTmp; +} + +void +GBI_ProfileReset() +{ + memset(GBI.profileTimer, 0, 12 * 256 * sizeof(int)); + memset(GBI.profileNum, 0, 12 * 256 * sizeof(int)); +} + +u32 +GBI_GetFuncTime(u32 ucode, u32 cmd) +{ + return GBI.profileTimer[ucode*256+cmd]; +} + +u32 +GBI_GetFuncNum(u32 ucode, u32 cmd) +{ + return GBI.profileNum[ucode*256+cmd]; +} + +u32 +GBI_ProfilePrint(FILE *file) +{ + int uc, cmd, total=0; + + for(uc=0;uc<12;uc++) + { + for(cmd=0;cmd<256;cmd++) + { + total += GBI_GetFuncTime(uc, cmd); + } + } + + + for(uc=0;uc<12;uc++) + { + for(cmd=0;cmd<256;cmd++) + { + unsigned int t = GBI_GetFuncTime(uc, cmd); + if (t != 0) + { + fprintf(file, "%s x %i = %u ms (%.2f%%)\n", GBI_GetFuncName(uc,cmd), GBI_GetFuncNum(uc, cmd), t, 100.0f * (float)t / total); + } + } + } + return total; +} + +const char* +GBI_GetUcodeName(u32 ucode) +{ + switch(ucode) + { + case F3D: return "F3D"; + case F3DEX: return "F3DEX"; + case F3DEX2: return "F3DEX2"; + case L3D: return "L3D"; + case L3DEX: return "L3DEX"; + case L3DEX2: return "L3DEX2"; + case S2DEX: return "S2DEX"; + case S2DEX2: return "S2DEX2"; + case F3DPD: return "F3DPD"; + case F3DDKR: return "F3DDKR"; + case F3DWRUS: return "F3DWRUS"; + case NONE: return "NONE"; + default: return "UNKNOWN UCODE"; + } +} + +const char* +GBI_GetFuncName(unsigned int ucode, unsigned int cmd) +{ + switch(cmd) + { + //common + case G_SETCIMG: return "G_SETCIMG"; + case G_SETZIMG: return "G_SETZIMG"; + case G_SETTIMG: return "G_SETTIMG"; + case G_SETCOMBINE: return "G_SETCOMBINE"; + case G_SETENVCOLOR: return "G_SETENVCOLOR"; + case G_SETPRIMCOLOR: return "G_SETPRIMCOLOR"; + case G_SETBLENDCOLOR: return "G_SETBLENDCOLOR"; + case G_SETFOGCOLOR: return "G_SETFOGCOLOR"; + case G_SETFILLCOLOR: return "G_SETFILLCOLOR"; + case G_FILLRECT: return "G_FILLRECT"; + case G_SETTILE: return "G_SETTILE"; + case G_LOADTILE: return "G_LOADTILE"; + case G_LOADBLOCK: return "G_LOADBLOCK"; + case G_SETTILESIZE: return "G_SETTILESIZE"; + case G_LOADTLUT: return "G_LOADTLUT"; + case G_RDPSETOTHERMODE: return "G_RDPSETOTHERMODE"; + case G_SETPRIMDEPTH: return "G_SETPRIMDEPTH"; + case G_SETSCISSOR: return "G_SETSCISSOR"; + case G_SETCONVERT: return "G_SETCONVERT"; + case G_SETKEYR: return "G_SETKEYR"; + case G_SETKEYGB: return "G_SETKEYGB"; + case G_RDPFULLSYNC: return "G_RDPFULLSYNC"; + case G_RDPTILESYNC: return "G_RDPTILESYNC"; + case G_RDPPIPESYNC: return "G_RDPPIPESYNC"; + case G_RDPLOADSYNC: return "G_RDPLOADSYNC"; + case G_TEXRECTFLIP: return "G_TEXRECTFLIP"; + + //ucode + case 0x00: return "SPNOOP"; + +/* + F3D_MTX: 0x01 + F3DEX2_VTX 0x01 + F3DDKR_DMA_MTX 0x01 + S2DEX_BG_1CYC 0x01 + S2DEX2_OBJ_RECTANGLE 0x01 +*/ + case 0x01: + { + switch(ucode) + { + case F3DEX2: return "F3DEX2_VTX"; + case F3DDKR: return "F3DDKR_DMA_MTX"; + case S2DEX: return "S2DEX_BG_1CYC"; + case S2DEX2: return "S2DEX2_OBJ_RECTANGLE"; + default: return "F3D_MTX"; + } + } +/* + F3D_RESERVED0: 0x02 + F3DEX2_MODIFYVTX 0x02 + S2DEX_BG_COPY 0x02 + S2DEX2_OBJ_SPRITE 0x02 +*/ + case 0x02: + { + switch(ucode) + { + case F3DEX2: return "F3DEX2_MODIFYVTX"; + case S2DEX: return "S2DEX_BG_COPY"; + case S2DEX2: return "S2DEX2_OBJ_SPRITE"; + default: return "F3D_RESERVED0"; + } + } +/* + F3D_MOVEMEM: 0x03 + F3DEX2_CULLDL 0x03 + S2DEX_OBJ_RECTANGLE 0x03 +*/ + case 0x03: + { + switch(ucode) + { + case F3DEX2: return "F3DEX2_CULLDL"; + case S2DEX: return "S2DEX_OBJ_RECTANGLE"; + default: return "F3D_MOVEMEM"; + } + } +/* + F3D_VTX: 0x04 + F3DEX2_BRANCH_Z 0x04 + F3DDKR_DMA_VTX 0x04 + S2DEX_OBJ_SPRITE 0x04 + S2DEX2_SELECT_DL 0x04 +*/ + case 0x04: + { + switch(ucode) + { + case F3DEX2: return "F3DEX2_BRANCH_Z"; + case F3DDKR: return "F3DDKR_DMA_VTX"; + case S2DEX: return "S2DEX_OBJ_SPRITE"; + case S2DEX2: return "S2DEX2_SELECT_DL"; + default: return "F3D_VTX"; + } + } + +/* + F3D_RESERVED1: 0x05 + F3DEX2_TRI1 0x05 + F3DDKR_DMA_TRI 0x05 + S2DEX_OBJ_MOVEMEM 0x05 + S2DEX2_OBJ_LOADTXTR 0x05 +*/ + case 0x05: + { + switch(ucode) + { + case F3DEX2: return "F3DEX2_TR1"; + case F3DDKR: return "F3DDKR_DMA_TRI"; + case S2DEX: return "S2DEX_OBJ_MOVEMEM"; + case S2DEX2: return "S2DEX2_OBJ_LOADTXTR"; + default: return "F3D_RESERVED1"; + } + } +/* + F3D_DL: 0x06 + F3DEX2_TRI2 0x06 + S2DEX2_OBJ_LDTX_SPRITE 0x06 +*/ + case 0x06: + { + switch(ucode) + { + case F3DEX2: return "F3DEX2_TR2"; + case S2DEX2: return "S2DEX2_OBJ_LDTX_SPRITE"; + default: return "F3D_DL"; + } + } + +/* + F3D_RESERVED2: 0x07 + F3DEX2_QUAD 0x07 + F3DPD_VTXCOLORBASE 0x07 + F3DDKR_DMA_DL 0x07 + S2DEX2_OBJ_LDTX_RECT 0x07 +*/ + case 0x07: + { + switch(ucode) + { + case F3DEX2: return "F3DEX2_QUAD"; + case F3DPD: return "F3DPD_VTXCOLORBASE"; + case F3DDKR: return "F3DDKR_DMA_DL"; + case S2DEX2: return "S2DEX2_OBJ_LDTX_RECT"; + default: return "F3D_RESERVED2"; + } + } +/* + F3D_RESERVED3: 0x08 + L3DEX2_LINE3D 0x08 + S2DEX2_OBJ_LDTX_RECT_R 0x08 +*/ + case 0x08: + { + switch(ucode) + { + case L3DEX2: return "L3DEX2_LINE3D"; + case S2DEX2: return "S2DEX2_OBJ_LDTX_RECT_R"; + default: return "F3D_RESERVED3"; + } + } + +/* + F3D_SPRITE2D_BASE: 0x09 + S2DEX2_BG_1CYC 0x09 +*/ + case 0x09: + { + switch(ucode) + { + case S2DEX2: return "S2DEX2_BG_1CYC"; + default: return "F3D_SPRITE2D_BASE"; + } + } + +// S2DEX2_BG_COPY 0x0A + case 0x0A: return "S2DEX2_BG_COPY"; +// S2DEX2_OBJ_RENDERMODE 0x0B + case 0x0B: return "S2DEX2_OBJ_RENDERMODE"; +// F3DEX2_RDPHALF_2 0xF1 + case 0xF1: return "F3DEX2_RDPHALF_2"; +/* + S2DEX_RDPHALF_0 0xE4 + S2DEX2_RDPHALF_0 0xE4 +*/ + case 0xE4: + { + switch(ucode) + { + case S2DEX: return "S2DEX_RDPHALF_0"; + case S2DEX2: return "S2DEX2_RDPHALF_0"; + default: return "G_TEXRECT"; + } + } +// F3DEX2_SETOTHERMODE_H 0xE3 + case 0xE3: return "F3DEX2_SETOTHERMODE_H"; +// F3DEX2_SETOTHERMODE_L 0xE2 + case 0xE2: return "F3DEX2_SETOTHERMODE_L"; +// F3DEX2_RDPHALF_1 0xE1 + case 0xE1: return "F3DEX2_RDPHALF_1"; +// F3DEX2_SPNOOP 0xE0 + case 0xE0: return "F3DEX2_SPNOOP"; +// F3DEX2_ENDDL 0xDF + case 0xDF: return "F3DEX2_ENDDL"; +// F3DEX2_DL 0xDE + case 0xDE: return "F3DEX2_DL"; +// F3DEX2_LOAD_UCODE 0xDD + case 0xDD: return "F3DEX2_LOAD_UCODE"; +/* + F3DEX2_MOVEMEM 0xDC + S2DEX2_OBJ_MOVEMEM 0xDC +*/ + case 0xDC: + { + switch(ucode) + { + case S2DEX2: return "S2DEX2_OBJ_MOVEMEM"; + default: return "F3DEX2_MOVEMEM"; + } + } +// F3DEX2_MOVEWORD 0xDB + case 0xDB: return "F3DEX2_MOVEWORD"; +/* + F3DEX2_MTX 0xDA + S2DEX2_OBJ_RECTANGLE_R 0xDA +*/ + case 0xDA: + { + switch(ucode) + { + case S2DEX2: return "S2DEX2_OBJ_RECTANGLE_R"; + default: return "F3DEX2_MTX"; + } + } +// F3DEX2_GEOMETRYMODE 0xD9 + case 0xD9: return "F3DEX2_GEOMETRYMODE"; +// F3DEX2_POPMTX 0xD8 + case 0xD8: return "F3DEX2_POPMTX"; +// F3DEX2_TEXTURE 0xD7 + case 0xD7: return "F3DEX2_TEXTURE"; +// F3DEX2_DMA_IO 0xD6 + case 0xD6: return "F3DEX2_DMA_IO"; +// F3DEX2_SPECIAL_1 0xD5 + case 0xD5: return "F3DEX2_SPECIAL_1"; +// F3DEX2_SPECIAL_2 0xD4 + case 0xD4: return "F3DEX2_SPECIAL_2"; +// F3DEX2_SPECIAL_3 0xD3 + case 0xD3: return "F3DEX2_SPECIAL_3"; + +// S2DEX_OBJ_LOADTXTR 0xC1 + case 0xC1: return "S2DEX_OBJ_LOADTXTR"; +// S2DEX_OBJ_LDTX_SPRITE 0xC2 + case 0xC2: return "S2DEX_OBJ_LDTX_SPRITE"; +// S2DEX_OBJ_LDTX_RECT 0xC3 + case 0xC3: return "S2DEX_OBJ_LDTX_RECT"; +// S2DEX_OBJ_LDTX_RECT_R 0xC4 + case 0xC4: return "S2DEX_OBJ_LDTX_RECT_R"; +/* + F3D_TRI1: 0xBF + F3DDKR_DMA_OFFSETS 0xBF +*/ + case 0xBF: + { + switch(ucode) + { + case F3DDKR: return "F3DDKR_DMA_OFFSETS"; + default: return "F3D_TRI1"; + } + } + +// F3D_CULLDL: 0xBE + case 0xBE: return "F3D_CULLDL"; +// F3D_POPMTX: 0xBD + case 0xBD: return "F3D_POPMTX"; +// F3D_MOVEWORD: 0xBC + case 0xBC: return "F3D_MOVEWORD"; +// F3D_TEXTURE: 0xBB + case 0xBB: return "F3D_TEXTURE"; +// F3D_SETOTHERMODE_H: 0xBA + case 0xBA: return "F3D_SETOTHERMODE_H"; +// F3D_SETOTHERMODE_L: 0xB9 + case 0xB9: return "F3D_SETOTHERMODE_L"; +// F3D_ENDDL: 0xB8 + case 0xB8: return "F3D_ENDDL"; +// F3D_SETGEOMETRYMODE: 0xB7 + case 0xB7: return "F3D_SETGEOMETRYMODE"; +// F3D_CLEARGEOMETRYMODE: 0xB6 + case 0xB6: return "F3D_CLEARGEOMETRYMODE"; +/* + F3D_QUAD: 0xB5 + L3D_LINE3D 0xB5 +*/ + case 0xB5: + { + switch(ucode) + { + case L3D: return "L3D_LINE3D"; + default: return "F3D_QUAD"; + } + } + +// F3D_RDPHALF_1: 0xB4 + case 0xB4: return "F3D_RDPHALF_1"; +// F3D_RDPHALF_2: 0xB3 + case 0xB3: return "F3D_RDPHALF_2"; +/* + F3D_RDPHALF_CONT: 0xB2 + F3DEX_MODIFYVTX 0xB2 + S2DEX_OBJ_RECTANGLE_R 0xB2 +*/ + case 0xB2: + { + switch(ucode) + { + case F3DEX: return "F3DEX_MODIFYVTX"; + case S2DEX: return "S2DEX_OBJ_RECTANGLE_R"; + default: return "F3D_RDPHALF_CONT"; + } + } +/* + F3D_TRI4: 0xB1 + F3DEX_TRI2 0xB1 + F3DWRUS_TRI2 0xB1 + S2DEX_OBJ_RENDERMODE 0xB1 +*/ + case 0xB1: + { + switch(ucode) + { + case F3DEX: return "F3DEX_TRI2"; + case F3DWRUS: return "F3DWRUS_TRI2"; + case S2DEX: return "S2DEX_OBJ_RENDERMODE"; + default: return "F3D_TRI4"; + } + } +/* + F3DEX_BRANCH_Z 0xB0 + S2DEX_SELECT_DL 0xB0 +*/ + case 0xB0: + { + switch(ucode) + { + case S2DEX: return "S2DEX_SELECT_DL"; + default: return "F3DEX_BRANCH_Z"; + } + } +/* + F3DEX_LOAD_UCODE 0xAF + S2DEX_LOAD_UCODE 0xAF +*/ + case 0xAF: + { + switch(ucode) + { + case S2DEX: return "S2DEX_LOAD_UCODE"; + default: return "F3DEX_LOAD_UCODE"; + } + } + + default: + { + if (ucode == F3DCBFD) + { + if (cmd >= 0x10 && cmd <= 0x1f) + return "F3DCBFD_TRI4"; + + } + return "UNKNOWN CMD"; + } + } +} +#endif + +MicrocodeInfo *GBI_DetectMicrocode( u32 uc_start, u32 uc_dstart, u16 uc_dsize ) +{ + MicrocodeInfo *current; + + for (unsigned int i = 0; i < GBI.numMicrocodes; i++) + { + current = GBI.top; + + while (current) + { + if ((current->address == uc_start) && (current->dataAddress == uc_dstart) && (current->dataSize == uc_dsize)) + return current; + + current = current->lower; + } + } + + current = GBI_AddMicrocode(); + + current->address = uc_start; + current->dataAddress = uc_dstart; + current->dataSize = uc_dsize; + current->NoN = FALSE; + current->type = NONE; + + // See if we can identify it by CRC + uc_crc = CRC_Calculate( 0xFFFFFFFF, &RDRAM[uc_start & 0x1FFFFFFF], 4096); + LOG(LOG_MINIMAL, "UCODE CRC=0x%x\n", uc_crc); + + for (u32 i = 0; i < sizeof( specialMicrocodes ) / sizeof( SpecialMicrocodeInfo ); i++) + { + if (uc_crc == specialMicrocodes[i].crc) + { + current->type = specialMicrocodes[i].type; + return current; + } + } + + // See if we can identify it by text + char uc_data[2048]; + UnswapCopy( &RDRAM[uc_dstart & 0x1FFFFFFF], uc_data, 2048 ); + strcpy( uc_str, "Not Found" ); + + for (u32 i = 0; i < 2048; i++) + { + if ((uc_data[i] == 'R') && (uc_data[i+1] == 'S') && (uc_data[i+2] == 'P')) + { + u32 j = 0; + while (uc_data[i+j] > 0x0A) + { + uc_str[j] = uc_data[i+j]; + j++; + } + + uc_str[j] = 0x00; + + int type = NONE; + + if (strncmp( &uc_str[4], "SW", 2 ) == 0) + { + type = F3D; + } + else if (strncmp( &uc_str[4], "Gfx", 3 ) == 0) + { + current->NoN = (strncmp( &uc_str[20], ".NoN", 4 ) == 0); + + if (strncmp( &uc_str[14], "F3D", 3 ) == 0) + { + if (uc_str[28] == '1') + type = F3DEX; + else if (uc_str[31] == '2') + type = F3DEX2; + } + else if (strncmp( &uc_str[14], "L3D", 3 ) == 0) + { + if (uc_str[28] == '1') + type = L3DEX; + else if (uc_str[31] == '2') + type = L3DEX2; + } + else if (strncmp( &uc_str[14], "S2D", 3 ) == 0) + { + if (uc_str[28] == '1') + type = S2DEX; + else if (uc_str[31] == '2') + type = S2DEX2; + } + } + + LOG(LOG_VERBOSE, "UCODE STRING=%s\n", uc_str); + + if (type != NONE) + { + current->type = type; + return current; + } + + break; + } + } + + + for (u32 i = 0; i < sizeof( specialMicrocodes ) / sizeof( SpecialMicrocodeInfo ); i++) + { + if (strcmp( uc_str, specialMicrocodes[i].text ) == 0) + { + current->type = specialMicrocodes[i].type; + return current; + } + } + + // Let the user choose the microcode + LOG(LOG_ERROR, "[gles2n64]: Warning - unknown ucode!!!\n"); + if(last_good_ucode != (u32)-1) + { + current->type=last_good_ucode; + } + else + { + current->type = MicrocodeDialog(); + } + return current; +} + +void GBI_MakeCurrent( MicrocodeInfo *current ) +{ + if (current != GBI.top) + { + if (current == GBI.bottom) + { + GBI.bottom = current->higher; + GBI.bottom->lower = NULL; + } + else + { + current->higher->lower = current->lower; + current->lower->higher = current->higher; + } + + current->higher = NULL; + current->lower = GBI.top; + GBI.top->higher = current; + GBI.top = current; + } + + if (!GBI.current || (GBI.current->type != current->type)) + { + + for (int i = 0; i <= 0xFF; i++) + GBI.cmd[i] = GBI_Unknown; + + RDP_Init(); + switch (current->type) + { + case F3D: F3D_Init(); break; + case F3DEX: F3DEX_Init(); break; + case F3DEX2: F3DEX2_Init(); break; + case L3D: L3D_Init(); break; + case L3DEX: L3DEX_Init(); break; + case L3DEX2: L3DEX2_Init(); break; + case S2DEX: S2DEX_Init(); break; + case S2DEX2: S2DEX2_Init(); break; + case F3DDKR: F3DDKR_Init(); break; + case F3DWRUS: F3DWRUS_Init(); break; + case F3DPD: F3DPD_Init(); break; + case F3DCBFD: F3DCBFD_Init(); break; + } + } + + + GBI.current = current; +} + diff --git a/source/gles2n64/src/GBI.h b/source/gles2n64/src/GBI.h new file mode 100644 index 0000000..13bd6bb --- /dev/null +++ b/source/gles2n64/src/GBI.h @@ -0,0 +1,820 @@ +#ifndef GBI_H +#define GBI_H +#include "Hash.h" +#include "Types.h" +#include + +// Microcode Types +#define F3D 0 +#define F3DEX 1 +#define F3DEX2 2 +#define L3D 3 +#define L3DEX 4 +#define L3DEX2 5 +#define S2DEX 6 +#define S2DEX2 7 +#define F3DPD 8 +#define F3DDKR 9 +#define F3DWRUS 10 +#define F3DCBFD 11 +#define NONE 12 + +#ifdef MAINDEF +const char *MicrocodeTypes[] = +{ + "Fast3D", + "F3DEX", + "F3DEX2", + "Line3D", + "L3DEX", + "L3DEX2", + "S2DEX", + "S2DEX2", + "Perfect Dark", + "DKR/JFG", + "Waverace US", + "Conker's Bad Fur Day", + "None", +}; +#else +extern const char *MicrocodeTypes[]; +#endif + +static const int numMicrocodeTypes = 11; + +// Fixed point conversion factors +#define FIXED2FLOATRECIP1 0.5f +#define FIXED2FLOATRECIP2 0.25f +#define FIXED2FLOATRECIP3 0.125f +#define FIXED2FLOATRECIP4 0.0625f +#define FIXED2FLOATRECIP5 0.03125f +#define FIXED2FLOATRECIP6 0.015625f +#define FIXED2FLOATRECIP7 0.0078125f +#define FIXED2FLOATRECIP8 0.00390625f +#define FIXED2FLOATRECIP9 0.001953125f +#define FIXED2FLOATRECIP10 0.0009765625f +#define FIXED2FLOATRECIP11 0.00048828125f +#define FIXED2FLOATRECIP12 0.00024414063f +#define FIXED2FLOATRECIP13 0.00012207031f +#define FIXED2FLOATRECIP14 6.1035156e-05f +#define FIXED2FLOATRECIP15 3.0517578e-05f +#define FIXED2FLOATRECIP16 1.5258789e-05f + +#define _FIXED2FLOAT( v, b ) \ + ((f32)v * FIXED2FLOATRECIP##b) + +// Useful macros for decoding GBI command's parameters +#define _SHIFTL( v, s, w ) \ + (((u32)v & ((0x01 << w) - 1)) << s) +#define _SHIFTR( v, s, w ) \ + (((u32)v >> s) & ((0x01 << w) - 1)) + +// BG flags +#define G_BGLT_LOADBLOCK 0x0033 +#define G_BGLT_LOADTILE 0xfff4 + +#define G_BG_FLAG_FLIPS 0x01 +#define G_BG_FLAG_FLIPT 0x10 + +// Sprite object render modes +#define G_OBJRM_NOTXCLAMP 0x01 +#define G_OBJRM_XLU 0x02 /* Ignored */ +#define G_OBJRM_ANTIALIAS 0x04 /* Ignored */ +#define G_OBJRM_BILERP 0x08 +#define G_OBJRM_SHRINKSIZE_1 0x10 +#define G_OBJRM_SHRINKSIZE_2 0x20 +#define G_OBJRM_WIDEN 0x40 + +// Sprite texture loading types +#define G_OBJLT_TXTRBLOCK 0x00001033 +#define G_OBJLT_TXTRTILE 0x00fc1034 +#define G_OBJLT_TLUT 0x00000030 + + +// These are all the constant flags +#define G_ZBUFFER 0x00000001 +#define G_SHADE 0x00000004 +#define G_FOG 0x00010000 +#define G_LIGHTING 0x00020000 +#define G_TEXTURE_GEN 0x00040000 +#define G_TEXTURE_GEN_LINEAR 0x00080000 +#define G_LOD 0x00100000 + +#define G_MV_MMTX 2 +#define G_MV_PMTX 6 +#define G_MV_LIGHT 10 +#define G_MV_POINT 12 +#define G_MV_MATRIX 14 + +#define G_MVO_LOOKATX 0 +#define G_MVO_LOOKATY 24 +#define G_MVO_L0 48 +#define G_MVO_L1 72 +#define G_MVO_L2 96 +#define G_MVO_L3 120 +#define G_MVO_L4 144 +#define G_MVO_L5 168 +#define G_MVO_L6 192 +#define G_MVO_L7 216 + +#define G_MV_LOOKATY 0x82 +#define G_MV_LOOKATX 0x84 +#define G_MV_L0 0x86 +#define G_MV_L1 0x88 +#define G_MV_L2 0x8a +#define G_MV_L3 0x8c +#define G_MV_L4 0x8e +#define G_MV_L5 0x90 +#define G_MV_L6 0x92 +#define G_MV_L7 0x94 +#define G_MV_TXTATT 0x96 +#define G_MV_MATRIX_1 0x9E +#define G_MV_MATRIX_2 0x98 +#define G_MV_MATRIX_3 0x9A +#define G_MV_MATRIX_4 0x9C + +#define G_MW_MATRIX 0x00 +#define G_MW_NUMLIGHT 0x02 +#define G_MW_CLIP 0x04 +#define G_MW_SEGMENT 0x06 +#define G_MW_FOG 0x08 +#define G_MW_LIGHTCOL 0x0A +#define G_MW_FORCEMTX 0x0C +#define G_MW_POINTS 0x0C +#define G_MW_PERSPNORM 0x0E +#define G_MV_COORDMOD 0x10 //Conker Bad Fur Day + +#define G_MWO_NUMLIGHT 0x00 +#define G_MWO_CLIP_RNX 0x04 +#define G_MWO_CLIP_RNY 0x0c +#define G_MWO_CLIP_RPX 0x14 +#define G_MWO_CLIP_RPY 0x1c +#define G_MWO_SEGMENT_0 0x00 +#define G_MWO_SEGMENT_1 0x01 +#define G_MWO_SEGMENT_2 0x02 +#define G_MWO_SEGMENT_3 0x03 +#define G_MWO_SEGMENT_4 0x04 +#define G_MWO_SEGMENT_5 0x05 +#define G_MWO_SEGMENT_6 0x06 +#define G_MWO_SEGMENT_7 0x07 +#define G_MWO_SEGMENT_8 0x08 +#define G_MWO_SEGMENT_9 0x09 +#define G_MWO_SEGMENT_A 0x0a +#define G_MWO_SEGMENT_B 0x0b +#define G_MWO_SEGMENT_C 0x0c +#define G_MWO_SEGMENT_D 0x0d +#define G_MWO_SEGMENT_E 0x0e +#define G_MWO_SEGMENT_F 0x0f +#define G_MWO_FOG 0x00 + +#define G_MWO_MATRIX_XX_XY_I 0x00 +#define G_MWO_MATRIX_XZ_XW_I 0x04 +#define G_MWO_MATRIX_YX_YY_I 0x08 +#define G_MWO_MATRIX_YZ_YW_I 0x0C +#define G_MWO_MATRIX_ZX_ZY_I 0x10 +#define G_MWO_MATRIX_ZZ_ZW_I 0x14 +#define G_MWO_MATRIX_WX_WY_I 0x18 +#define G_MWO_MATRIX_WZ_WW_I 0x1C +#define G_MWO_MATRIX_XX_XY_F 0x20 +#define G_MWO_MATRIX_XZ_XW_F 0x24 +#define G_MWO_MATRIX_YX_YY_F 0x28 +#define G_MWO_MATRIX_YZ_YW_F 0x2C +#define G_MWO_MATRIX_ZX_ZY_F 0x30 +#define G_MWO_MATRIX_ZZ_ZW_F 0x34 +#define G_MWO_MATRIX_WX_WY_F 0x38 +#define G_MWO_MATRIX_WZ_WW_F 0x3C +#define G_MWO_POINT_RGBA 0x10 +#define G_MWO_POINT_ST 0x14 +#define G_MWO_POINT_XYSCREEN 0x18 +#define G_MWO_POINT_ZSCREEN 0x1C + +#ifdef DEBUG +static const char *MWOPointText[] = +{ + "G_MWO_POINT_RGBA", + "G_MWO_POINT_ST", + "G_MWO_POINT_XYSCREEN", + "G_MWO_POINT_ZSCREEN" +}; + +static const char *MWOMatrixText[] = +{ + "G_MWO_MATRIX_XX_XY_I", "G_MWO_MATRIX_XZ_XW_I", "G_MWO_MATRIX_YX_YY_I", "G_MWO_MATRIX_YZ_YW_I", + "G_MWO_MATRIX_ZX_ZY_I", "G_MWO_MATRIX_ZZ_ZW_I", "G_MWO_MATRIX_WX_WY_I", "G_MWO_MATRIX_WZ_WW_I", + "G_MWO_MATRIX_XX_XY_F", "G_MWO_MATRIX_XZ_XW_F", "G_MWO_MATRIX_YX_YY_F", "G_MWO_MATRIX_YZ_YW_F", + "G_MWO_MATRIX_ZX_ZY_F", "G_MWO_MATRIX_ZZ_ZW_F", "G_MWO_MATRIX_WX_WY_F", "G_MWO_MATRIX_WZ_WW_F" +}; +#endif + +// These flags change between ucodes +extern u32 G_MTX_STACKSIZE; + +extern u32 G_MTX_MODELVIEW; +extern u32 G_MTX_PROJECTION; +extern u32 G_MTX_MUL; +extern u32 G_MTX_LOAD; +extern u32 G_MTX_NOPUSH; +extern u32 G_MTX_PUSH; + +extern u32 G_TEXTURE_ENABLE; +extern u32 G_SHADING_SMOOTH; +extern u32 G_CULL_FRONT; +extern u32 G_CULL_BACK; +extern u32 G_CULL_BOTH; +extern u32 G_CLIPPING; + +extern u32 G_MV_VIEWPORT; + +extern u32 G_MWO_aLIGHT_1, G_MWO_bLIGHT_1; +extern u32 G_MWO_aLIGHT_2, G_MWO_bLIGHT_2; +extern u32 G_MWO_aLIGHT_3, G_MWO_bLIGHT_3; +extern u32 G_MWO_aLIGHT_4, G_MWO_bLIGHT_4; +extern u32 G_MWO_aLIGHT_5, G_MWO_bLIGHT_5; +extern u32 G_MWO_aLIGHT_6, G_MWO_bLIGHT_6; +extern u32 G_MWO_aLIGHT_7, G_MWO_bLIGHT_7; +extern u32 G_MWO_aLIGHT_8, G_MWO_bLIGHT_8; + +// Image formats +#define G_IM_FMT_RGBA 0 +#define G_IM_FMT_YUV 1 +#define G_IM_FMT_CI 2 +#define G_IM_FMT_IA 3 +#define G_IM_FMT_I 4 +#define G_IM_FMT_CI_IA 5 //not real + +// Image sizes +#define G_IM_SIZ_4b 0 +#define G_IM_SIZ_8b 1 +#define G_IM_SIZ_16b 2 +#define G_IM_SIZ_32b 3 +#define G_IM_SIZ_DD 5 + +#define G_TX_MIRROR 0x1 +#define G_TX_CLAMP 0x2 + +#ifdef DEBUG +static const char *ImageFormatText[] = +{ + "G_IM_FMT_RGBA", + "G_IM_FMT_YUV", + "G_IM_FMT_CI", + "G_IM_FMT_IA", + "G_IM_FMT_I", + "G_IM_FMT_INVALID", + "G_IM_FMT_INVALID", + "G_IM_FMT_INVALID" +}; + +static const char *ImageSizeText[] = +{ + "G_IM_SIZ_4b", + "G_IM_SIZ_8b", + "G_IM_SIZ_16b", + "G_IM_SIZ_32b" +}; + +static const char *SegmentText[] = +{ + "G_MWO_SEGMENT_0", "G_MWO_SEGMENT_1", "G_MWO_SEGMENT_2", "G_MWO_SEGMENT_3", + "G_MWO_SEGMENT_4", "G_MWO_SEGMENT_5", "G_MWO_SEGMENT_6", "G_MWO_SEGMENT_7", + "G_MWO_SEGMENT_8", "G_MWO_SEGMENT_9", "G_MWO_SEGMENT_A", "G_MWO_SEGMENT_B", + "G_MWO_SEGMENT_C", "G_MWO_SEGMENT_D", "G_MWO_SEGMENT_E", "G_MWO_SEGMENT_F" +}; +#endif + +#define G_NOOP 0x00 + +#define G_IMMFIRST -65 + +// These GBI commands are common to all ucodes +#define G_SETCIMG 0xFF /* -1 */ +#define G_SETZIMG 0xFE /* -2 */ +#define G_SETTIMG 0xFD /* -3 */ +#define G_SETCOMBINE 0xFC /* -4 */ +#define G_SETENVCOLOR 0xFB /* -5 */ +#define G_SETPRIMCOLOR 0xFA /* -6 */ +#define G_SETBLENDCOLOR 0xF9 /* -7 */ +#define G_SETFOGCOLOR 0xF8 /* -8 */ +#define G_SETFILLCOLOR 0xF7 /* -9 */ +#define G_FILLRECT 0xF6 /* -10 */ +#define G_SETTILE 0xF5 /* -11 */ +#define G_LOADTILE 0xF4 /* -12 */ +#define G_LOADBLOCK 0xF3 /* -13 */ +#define G_SETTILESIZE 0xF2 /* -14 */ +#define G_LOADTLUT 0xF0 /* -16 */ +#define G_RDPSETOTHERMODE 0xEF /* -17 */ +#define G_SETPRIMDEPTH 0xEE /* -18 */ +#define G_SETSCISSOR 0xED /* -19 */ +#define G_SETCONVERT 0xEC /* -20 */ +#define G_SETKEYR 0xEB /* -21 */ +#define G_SETKEYGB 0xEA /* -22 */ +#define G_RDPFULLSYNC 0xE9 /* -23 */ +#define G_RDPTILESYNC 0xE8 /* -24 */ +#define G_RDPPIPESYNC 0xE7 /* -25 */ +#define G_RDPLOADSYNC 0xE6 /* -26 */ +#define G_TEXRECTFLIP 0xE5 /* -27 */ +#define G_TEXRECT 0xE4 /* -28 */ + +#define G_RDPNOOP 0xC0 + +#define G_TRI_FILL 0xC8 /* fill triangle: 11001000 */ +#define G_TRI_FILL_ZBUFF 0xC9 /* fill, zbuff triangle: 11001001 */ +#define G_TRI_TXTR 0xCA /* texture triangle: 11001010 */ +#define G_TRI_TXTR_ZBUFF 0xCB /* texture, zbuff triangle: 11001011 */ +#define G_TRI_SHADE 0xCC /* shade triangle: 11001100 */ +#define G_TRI_SHADE_ZBUFF 0xCD /* shade, zbuff triangle: 11001101 */ +#define G_TRI_SHADE_TXTR 0xCE /* shade, texture triangle: 11001110 */ +#define G_TRI_SHADE_TXTR_ZBUFF 0xCF /* shade, txtr, zbuff trngl: 11001111 */ + +/* + * G_SETOTHERMODE_L sft: shift count + */ +#define G_MDSFT_ALPHACOMPARE 0 +#define G_MDSFT_ZSRCSEL 2 +#define G_MDSFT_RENDERMODE 3 +#define G_MDSFT_BLENDER 16 + +/* + * G_SETOTHERMODE_H sft: shift count + */ +#define G_MDSFT_BLENDMASK 0 /* unsupported */ +#define G_MDSFT_ALPHADITHER 4 +#define G_MDSFT_RGBDITHER 6 + +#define G_MDSFT_COMBKEY 8 +#define G_MDSFT_TEXTCONV 9 +#define G_MDSFT_TEXTFILT 12 +#define G_MDSFT_TEXTLUT 14 +#define G_MDSFT_TEXTLOD 16 +#define G_MDSFT_TEXTDETAIL 17 +#define G_MDSFT_TEXTPERSP 19 +#define G_MDSFT_CYCLETYPE 20 +#define G_MDSFT_COLORDITHER 22 /* unsupported in HW 2.0 */ +#define G_MDSFT_PIPELINE 23 + +/* G_SETOTHERMODE_H gPipelineMode */ +#define G_PM_1PRIMITIVE 1 +#define G_PM_NPRIMITIVE 0 + +/* G_SETOTHERMODE_H gSetCycleType */ +#define G_CYC_1CYCLE 0 +#define G_CYC_2CYCLE 1 +#define G_CYC_COPY 2 +#define G_CYC_FILL 3 + +/* G_SETOTHERMODE_H gSetTexturePersp */ +#define G_TP_NONE 0 +#define G_TP_PERSP 1 + +/* G_SETOTHERMODE_H gSetTextureDetail */ +#define G_TD_CLAMP 0 +#define G_TD_SHARPEN 1 +#define G_TD_DETAIL 2 + +/* G_SETOTHERMODE_H gSetTextureLOD */ +#define G_TL_TILE 0 +#define G_TL_LOD 1 + +/* G_SETOTHERMODE_H gSetTextureLUT */ +#define G_TT_NONE 0 +#define G_TT_RGBA16 2 +#define G_TT_IA16 3 + +/* G_SETOTHERMODE_H gSetTextureFilter */ +#define G_TF_POINT 0 +#define G_TF_AVERAGE 3 +#define G_TF_BILERP 2 + +/* G_SETOTHERMODE_H gSetTextureConvert */ +#define G_TC_CONV 0 +#define G_TC_FILTCONV 5 +#define G_TC_FILT 6 + +/* G_SETOTHERMODE_H gSetCombineKey */ +#define G_CK_NONE 0 +#define G_CK_KEY 1 + +/* G_SETOTHERMODE_H gSetColorDither */ +#define G_CD_MAGICSQ 0 +#define G_CD_BAYER 1 +#define G_CD_NOISE 2 + +#define G_CD_DISABLE 3 +#define G_CD_ENABLE G_CD_NOISE /* HW 1.0 compatibility mode */ + +/* G_SETOTHERMODE_H gSetAlphaDither */ +#define G_AD_PATTERN 0 +#define G_AD_NOTPATTERN 1 +#define G_AD_NOISE 2 +#define G_AD_DISABLE 3 + +/* G_SETOTHERMODE_L gSetAlphaCompare */ +#define G_AC_NONE 0 +#define G_AC_THRESHOLD 1 +#define G_AC_DITHER 3 + +/* G_SETOTHERMODE_L gSetDepthSource */ +#define G_ZS_PIXEL 0 +#define G_ZS_PRIM 1 + +/* G_SETOTHERMODE_L gSetRenderMode */ +#define AA_EN 1 +#define Z_CMP 1 +#define Z_UPD 1 +#define IM_RD 1 +#define CLR_ON_CVG 1 +#define CVG_DST_CLAMP 0 +#define CVG_DST_WRAP 1 +#define CVG_DST_FULL 2 +#define CVG_DST_SAVE 3 +#define ZMODE_OPA 0 +#define ZMODE_INTER 1 +#define ZMODE_XLU 2 +#define ZMODE_DEC 3 +#define CVG_X_ALPHA 1 +#define ALPHA_CVG_SEL 1 +#define FORCE_BL 1 +#define TEX_EDGE 0 // not used + +#define G_SC_NON_INTERLACE 0 +#define G_SC_EVEN_INTERLACE 2 +#define G_SC_ODD_INTERLACE 3 + +#ifdef DEBUG +static const char *AAEnableText = "AA_EN"; +static const char *DepthCompareText = "Z_CMP"; +static const char *DepthUpdateText = "Z_UPD"; +static const char *ClearOnCvgText = "CLR_ON_CVG"; +static const char *CvgXAlphaText = "CVG_X_ALPHA"; +static const char *AlphaCvgSelText = "ALPHA_CVG_SEL"; +static const char *ForceBlenderText = "FORCE_BL"; + +static const char *AlphaCompareText[] = +{ + "G_AC_NONE", "G_AC_THRESHOLD", "G_AC_INVALID", "G_AC_DITHER" +}; + +static const char *DepthSourceText[] = +{ + "G_ZS_PIXEL", "G_ZS_PRIM" +}; + +static const char *AlphaDitherText[] = +{ + "G_AD_PATTERN", "G_AD_NOTPATTERN", "G_AD_NOISE", "G_AD_DISABLE" +}; + +static const char *ColorDitherText[] = +{ + "G_CD_MAGICSQ", "G_CD_BAYER", "G_CD_NOISE", "G_CD_DISABLE" +}; + +static const char *CombineKeyText[] = +{ + "G_CK_NONE", "G_CK_KEY" +}; + +static const char *TextureConvertText[] = +{ + "G_TC_CONV", "G_TC_INVALID", "G_TC_INVALID", "G_TC_INVALID", "G_TC_INVALID", "G_TC_FILTCONV", "G_TC_FILT", "G_TC_INVALID" +}; + +static const char *TextureFilterText[] = +{ + "G_TF_POINT", "G_TF_INVALID", "G_TF_BILERP", "G_TF_AVERAGE" +}; + +static const char *TextureLUTText[] = +{ + "G_TT_NONE", "G_TT_INVALID", "G_TT_RGBA16", "G_TT_IA16" +}; + +static const char *TextureLODText[] = +{ + "G_TL_TILE", "G_TL_LOD" +}; + +static const char *TextureDetailText[] = +{ + "G_TD_CLAMP", "G_TD_SHARPEN", "G_TD_DETAIL" +}; + +static const char *TexturePerspText[] = +{ + "G_TP_NONE", "G_TP_PERSP" +}; + +static const char *CycleTypeText[] = +{ + "G_CYC_1CYCLE", "G_CYC_2CYCLE", "G_CYC_COPY", "G_CYC_FILL" +}; + +static const char *PipelineModeText[] = +{ + "G_PM_NPRIMITIVE", "G_PM_1PRIMITIVE" +}; + +static const char *CvgDestText[] = +{ + "CVG_DST_CLAMP", "CVG_DST_WRAP", "CVG_DST_FULL", "CVG_DST_SAVE" +}; + +static const char *DepthModeText[] = +{ + "ZMODE_OPA", "ZMODE_INTER", "ZMODE_XLU", "ZMODE_DEC" +}; + +static const char *ScissorModeText[] = +{ + "G_SC_NON_INTERLACE", "G_SC_INVALID", "G_SC_EVEN_INTERLACE", "G_SC_ODD_INTERLACE" +}; +#endif + +/* Color combiner constants: */ +#define G_CCMUX_COMBINED 0 +#define G_CCMUX_TEXEL0 1 +#define G_CCMUX_TEXEL1 2 +#define G_CCMUX_PRIMITIVE 3 +#define G_CCMUX_SHADE 4 +#define G_CCMUX_ENVIRONMENT 5 +#define G_CCMUX_CENTER 6 +#define G_CCMUX_SCALE 6 +#define G_CCMUX_COMBINED_ALPHA 7 +#define G_CCMUX_TEXEL0_ALPHA 8 +#define G_CCMUX_TEXEL1_ALPHA 9 +#define G_CCMUX_PRIMITIVE_ALPHA 10 +#define G_CCMUX_SHADE_ALPHA 11 +#define G_CCMUX_ENV_ALPHA 12 +#define G_CCMUX_LOD_FRACTION 13 +#define G_CCMUX_PRIM_LOD_FRAC 14 +#define G_CCMUX_NOISE 7 +#define G_CCMUX_K4 7 +#define G_CCMUX_K5 15 +#define G_CCMUX_1 6 +#define G_CCMUX_0 31 + +/* Alpha combiner constants: */ +#define G_ACMUX_COMBINED 0 +#define G_ACMUX_TEXEL0 1 +#define G_ACMUX_TEXEL1 2 +#define G_ACMUX_PRIMITIVE 3 +#define G_ACMUX_SHADE 4 +#define G_ACMUX_ENVIRONMENT 5 +#define G_ACMUX_LOD_FRACTION 0 +#define G_ACMUX_PRIM_LOD_FRAC 6 +#define G_ACMUX_1 6 +#define G_ACMUX_0 7 + +#ifdef DEBUG +static const char *saRGBText[] = +{ + "COMBINED", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "NOISE", "1", + "0", "0", "0", "0", + "0", "0", "0", "0" +}; + +static const char *sbRGBText[] = +{ + "COMBINED", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "CENTER", "K4", + "0", "0", "0", "0", + "0", "0", "0", "0" +}; + +static const char *mRGBText[] = +{ + "COMBINED", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "SCALE", "COMBINED_ALPHA", + "TEXEL0_ALPHA", "TEXEL1_ALPHA", "PRIMITIVE_ALPHA", "SHADE_ALPHA", + "ENV_ALPHA", "LOD_FRACTION", "PRIM_LOD_FRAC", "K5", + "0", "0", "0", "0", + "0", "0", "0", "0", + "0", "0", "0", "0", + "0", "0", "0", "0" +}; + +static const char *aRGBText[] = +{ + "COMBINED", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "1", "0", +}; + +static const char *saAText[] = +{ + "COMBINED", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "1", "0", +}; + +static const char *sbAText[] = +{ + "COMBINED", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "1", "0", +}; + +static const char *mAText[] = +{ + "LOD_FRACTION", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "PRIM_LOD_FRAC", "0", +}; + +static const char *aAText[] = +{ + "COMBINED", "TEXEL0", "TEXEL1", "PRIMITIVE", + "SHADE", "ENVIRONMENT", "1", "0", +}; +#endif + +extern u32 G_RDPHALF_1, G_RDPHALF_2, G_RDPHALF_CONT; +extern u32 G_SPNOOP; +extern u32 G_SETOTHERMODE_H, G_SETOTHERMODE_L; +extern u32 G_DL, G_ENDDL, G_CULLDL, G_BRANCH_Z; +extern u32 G_LOAD_UCODE; +extern u32 G_MOVEMEM, G_MOVEWORD; +extern u32 G_MTX, G_POPMTX; +extern u32 G_GEOMETRYMODE, G_SETGEOMETRYMODE, G_CLEARGEOMETRYMODE; +extern u32 G_TEXTURE; +extern u32 G_DMA_IO, G_DMA_DL, G_DMA_TRI, G_DMA_MTX, G_DMA_VTX, G_DMA_OFFSETS; +extern u32 G_SPECIAL_1, G_SPECIAL_2, G_SPECIAL_3; +extern u32 G_VTX, G_MODIFYVTX, G_VTXCOLORBASE; +extern u32 G_TRI1, G_TRI2, G_TRI4; +extern u32 G_QUAD, G_LINE3D; +extern u32 G_RESERVED0, G_RESERVED1, G_RESERVED2, G_RESERVED3; +extern u32 G_SPRITE2D_BASE; +extern u32 G_BG_1CYC, G_BG_COPY; +extern u32 G_OBJ_RECTANGLE, G_OBJ_SPRITE, G_OBJ_MOVEMEM; +extern u32 G_SELECT_DL, G_OBJ_RENDERMODE, G_OBJ_RECTANGLE_R; +extern u32 G_OBJ_LOADTXTR, G_OBJ_LDTX_SPRITE, G_OBJ_LDTX_RECT, G_OBJ_LDTX_RECT_R; +extern u32 G_RDPHALF_0, G_TRI_UNKNOWN; + +#define LIGHT_1 1 +#define LIGHT_2 2 +#define LIGHT_3 3 +#define LIGHT_4 4 +#define LIGHT_5 5 +#define LIGHT_6 6 +#define LIGHT_7 7 +#define LIGHT_8 8 + +#define G_DL_PUSH 0x00 +#define G_DL_NOPUSH 0x01 + +typedef struct +{ + s16 y; + s16 x; + + u16 flag; + s16 z; + + s16 t; + s16 s; + + union { + struct + { + u8 a; + u8 b; + u8 g; + u8 r; + } color; + struct + { + s8 a; + s8 z; // b + s8 y; //g + s8 x; //r + } normal; + }; +} Vertex; + +typedef struct +{ + s16 y, x; + u16 ci; + s16 z; + s16 t, s; +} PDVertex; + + +typedef struct +{ + u8 v2, v1, v0, flag; + s16 t0, s0; + s16 t1, s1; + s16 t2, s2; +} DKRTriangle; + +struct Light +{ + u8 pad0, b, g, r; + u8 pad1, b2, g2, r2; + s8 pad2, z, y, x; +}; + +struct LightMM +{ + u8 pad0, b, g, r; + u8 pad1, b2, g2, r2; + s16 y, x, range, z; +}; + + +// GBI commands +typedef void (*GBIFunc)( u32 w0, u32 w1 ); +//extern GBIFunc GBICmd[256]; + +struct SpecialMicrocodeInfo +{ + u32 type; + u32 NoN; + u32 crc; + const char *text; +}; + +struct MicrocodeInfo +{ + u32 address, dataAddress; + u16 dataSize; + u32 type; + u32 NoN; + u32 crc; + u32 *text; + + MicrocodeInfo *higher, *lower; +}; + +struct GBIInfo +{ + GBIFunc cmd[256]; + + u32 PCStackSize, numMicrocodes; + MicrocodeInfo *current, *top, *bottom; + +#ifdef PROFILE_GBI + unsigned int profileTimer[256 * 12]; + unsigned int profileNum[256 * 12]; + unsigned int profileTmp; +#endif +}; + +extern GBIInfo GBI; + +#ifdef PROFILE_GBI +void GBI_ProfileReset(); +void GBI_ProfileInit(); +void GBI_ProfileBegin(u32 cmd); +void GBI_ProfileEnd(u32 cmd); +u32 GBI_ProfilePrint(FILE *file); +const char* GBI_GetFuncName(u32 ucode, u32 cmd); +u32 GBI_GetFuncTime(u32 ucode, u32 cmd); +#endif + +void GBI_MakeCurrent( MicrocodeInfo *current ); +MicrocodeInfo *GBI_DetectMicrocode( u32 uc_start, u32 uc_dstart, u16 uc_dsize ); +extern u32 last_good_ucode; +void GBI_Init(); +void GBI_Destroy(); + +// Allows easier setting of GBI commands +#define GBI_SetGBI( command, value, function ) \ + command = value; \ + GBI.cmd[command] = function + +#define GBI_InitFlags( ucode ) \ + G_MTX_STACKSIZE = ucode##_MTX_STACKSIZE; \ + G_MTX_MODELVIEW = ucode##_MTX_MODELVIEW; \ + G_MTX_PROJECTION = ucode##_MTX_PROJECTION; \ + G_MTX_MUL = ucode##_MTX_MUL; \ + G_MTX_LOAD = ucode##_MTX_LOAD; \ + G_MTX_NOPUSH = ucode##_MTX_NOPUSH; \ + G_MTX_PUSH = ucode##_MTX_PUSH; \ +\ + G_TEXTURE_ENABLE = ucode##_TEXTURE_ENABLE; \ + G_SHADING_SMOOTH = ucode##_SHADING_SMOOTH; \ + G_CULL_FRONT = ucode##_CULL_FRONT; \ + G_CULL_BACK = ucode##_CULL_BACK; \ + G_CULL_BOTH = ucode##_CULL_BOTH; \ + G_CLIPPING = ucode##_CLIPPING; \ +\ + G_MV_VIEWPORT = ucode##_MV_VIEWPORT; \ +\ + G_MWO_aLIGHT_1 = ucode##_MWO_aLIGHT_1; \ + G_MWO_bLIGHT_1 = ucode##_MWO_bLIGHT_1; \ + G_MWO_aLIGHT_2 = ucode##_MWO_aLIGHT_2; \ + G_MWO_bLIGHT_2 = ucode##_MWO_bLIGHT_2; \ + G_MWO_aLIGHT_3 = ucode##_MWO_aLIGHT_3; \ + G_MWO_bLIGHT_3 = ucode##_MWO_bLIGHT_3; \ + G_MWO_aLIGHT_4 = ucode##_MWO_aLIGHT_4; \ + G_MWO_bLIGHT_4 = ucode##_MWO_bLIGHT_4; \ + G_MWO_aLIGHT_5 = ucode##_MWO_aLIGHT_5; \ + G_MWO_bLIGHT_5 = ucode##_MWO_bLIGHT_5; \ + G_MWO_aLIGHT_6 = ucode##_MWO_aLIGHT_6; \ + G_MWO_bLIGHT_6 = ucode##_MWO_bLIGHT_6; \ + G_MWO_aLIGHT_7 = ucode##_MWO_aLIGHT_7; \ + G_MWO_bLIGHT_7 = ucode##_MWO_bLIGHT_7; \ + G_MWO_aLIGHT_8 = ucode##_MWO_aLIGHT_8; \ + G_MWO_bLIGHT_8 = ucode##_MWO_bLIGHT_8; + +#endif + diff --git a/source/gles2n64/src/Hash.h b/source/gles2n64/src/Hash.h new file mode 100644 index 0000000..1f26ec9 --- /dev/null +++ b/source/gles2n64/src/Hash.h @@ -0,0 +1,42 @@ +#ifndef __HASH_H__ +#define __HASH_H__ + +#include + +template +class HashMap +{ +public: + void init(unsigned power2) + { + _mask = (1 << power2) - 1; + _hashmap = (T**)malloc((_mask+1) * sizeof(T*)); + reset(); + } + + void destroy() + { + free(_hashmap); + } + + void reset() + { + memset(_hashmap, 0, (_mask+1) * sizeof(T*)); + } + + void insert(unsigned hash, T* data) + { + _hashmap[hash & _mask] = data; + } + + T* find(unsigned hash) + { + return _hashmap[hash & _mask]; + } + +protected: + T **_hashmap; + unsigned _mask; +}; + +#endif diff --git a/source/gles2n64/src/L3D.cpp b/source/gles2n64/src/L3D.cpp new file mode 100644 index 0000000..72f9fe3 --- /dev/null +++ b/source/gles2n64/src/L3D.cpp @@ -0,0 +1,57 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "L3D.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" + +void L3D_Line3D( u32 w0, u32 w1 ) +{ + u32 wd = _SHIFTR( w1, 0, 8 ); + + if (wd == 0) + gSPLine3D( _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, _SHIFTR( w1, 24, 8 ) ); + else + gSPLineW3D( _SHIFTR( w1, 16, 8 ) / 10, _SHIFTR( w1, 8, 8 ) / 10, wd, _SHIFTR( w1, 24, 8 ) ); +} + +void L3D_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3D ); + + GBI.PCStackSize = 10; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_MTX, F3D_MTX, F3D_Mtx ); + GBI_SetGBI( G_RESERVED0, F3D_RESERVED0, F3D_Reserved0 ); + GBI_SetGBI( G_MOVEMEM, F3D_MOVEMEM, F3D_MoveMem ); + GBI_SetGBI( G_VTX, F3D_VTX, F3D_Vtx ); + GBI_SetGBI( G_RESERVED1, F3D_RESERVED1, F3D_Reserved1 ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_RESERVED2, F3D_RESERVED2, F3D_Reserved2 ); + GBI_SetGBI( G_RESERVED3, F3D_RESERVED3, F3D_Reserved3 ); + GBI_SetGBI( G_SPRITE2D_BASE, F3D_SPRITE2D_BASE, F3D_Sprite2D_Base ); + +// GBI_SetGBI( G_TRI1, F3D_TRI1, F3D_Tri1 ); + GBI_SetGBI( G_CULLDL, F3D_CULLDL, F3D_CullDL ); + GBI_SetGBI( G_POPMTX, F3D_POPMTX, F3D_PopMtx ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3D_MoveWord ); + GBI_SetGBI( G_TEXTURE, F3D_TEXTURE, F3D_Texture ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_SETGEOMETRYMODE, F3D_SETGEOMETRYMODE, F3D_SetGeometryMode ); + GBI_SetGBI( G_CLEARGEOMETRYMODE, F3D_CLEARGEOMETRYMODE, F3D_ClearGeometryMode ); + GBI_SetGBI( G_LINE3D, L3D_LINE3D, L3D_Line3D ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_RDPHALF_CONT, F3D_RDPHALF_CONT, F3D_RDPHalf_Cont ); +// GBI_SetGBI( G_TRI4, F3D_TRI4, F3D_Tri4 ); +} + diff --git a/source/gles2n64/src/L3D.h b/source/gles2n64/src/L3D.h new file mode 100644 index 0000000..87f3b4f --- /dev/null +++ b/source/gles2n64/src/L3D.h @@ -0,0 +1,10 @@ +#ifndef L3D_H +#define L3D_H +#include "Types.h" + +#define L3D_LINE3D 0xB5 + +void L3D_Line3D( u32 w0, u32 w1 ); +void L3D_Init(); +#endif + diff --git a/source/gles2n64/src/L3DEX.cpp b/source/gles2n64/src/L3DEX.cpp new file mode 100644 index 0000000..2774c92 --- /dev/null +++ b/source/gles2n64/src/L3DEX.cpp @@ -0,0 +1,61 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DEX.h" +#include "L3D.h" +#include "L3DEX.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" + +void L3DEX_Line3D( u32 w0, u32 w1 ) +{ + u32 wd = _SHIFTR( w1, 0, 8 ); + + if (wd == 0) + gSPLine3D( _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), 0 ); + else + gSPLineW3D( _SHIFTR( w1, 17, 7 ), _SHIFTR( w1, 9, 7 ), wd, 0 ); +} + +void L3DEX_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3DEX ); + + GBI.PCStackSize = 18; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_MTX, F3D_MTX, F3D_Mtx ); + GBI_SetGBI( G_RESERVED0, F3D_RESERVED0, F3D_Reserved0 ); + GBI_SetGBI( G_MOVEMEM, F3D_MOVEMEM, F3D_MoveMem ); + GBI_SetGBI( G_VTX, F3D_VTX, F3DEX_Vtx ); + GBI_SetGBI( G_RESERVED1, F3D_RESERVED1, F3D_Reserved1 ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_RESERVED2, F3D_RESERVED2, F3D_Reserved2 ); + GBI_SetGBI( G_RESERVED3, F3D_RESERVED3, F3D_Reserved3 ); + GBI_SetGBI( G_SPRITE2D_BASE, F3D_SPRITE2D_BASE, F3D_Sprite2D_Base ); + +// GBI_SetGBI( G_TRI1, F3D_TRI1, F3DEX_Tri1 ); + GBI_SetGBI( G_CULLDL, F3D_CULLDL, F3DEX_CullDL ); + GBI_SetGBI( G_POPMTX, F3D_POPMTX, F3D_PopMtx ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3D_MoveWord ); + GBI_SetGBI( G_TEXTURE, F3D_TEXTURE, F3D_Texture ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_SETGEOMETRYMODE, F3D_SETGEOMETRYMODE, F3D_SetGeometryMode ); + GBI_SetGBI( G_CLEARGEOMETRYMODE, F3D_CLEARGEOMETRYMODE, F3D_ClearGeometryMode ); + GBI_SetGBI( G_LINE3D, L3D_LINE3D, L3DEX_Line3D ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_MODIFYVTX, F3DEX_MODIFYVTX, F3DEX_ModifyVtx ); +// GBI_SetGBI( G_TRI2, F3DEX_TRI2, F3DEX_Tri2 ); + GBI_SetGBI( G_BRANCH_Z, F3DEX_BRANCH_Z, F3DEX_Branch_Z ); + GBI_SetGBI( G_LOAD_UCODE, F3DEX_LOAD_UCODE, F3DEX_Load_uCode ); +} + diff --git a/source/gles2n64/src/L3DEX.h b/source/gles2n64/src/L3DEX.h new file mode 100644 index 0000000..ddcfb41 --- /dev/null +++ b/source/gles2n64/src/L3DEX.h @@ -0,0 +1,8 @@ +#ifndef L3DEX_H +#define L3DEX_H +#include "Types.h" + +void L3DEX_Line3D( u32 w0, u32 w1 ); +void L3DEX_Init(); +#endif + diff --git a/source/gles2n64/src/L3DEX2.cpp b/source/gles2n64/src/L3DEX2.cpp new file mode 100644 index 0000000..4194a8b --- /dev/null +++ b/source/gles2n64/src/L3DEX2.cpp @@ -0,0 +1,61 @@ +#include "gles2N64.h" +#include "Debug.h" +#include "F3D.h" +#include "F3DEX.h" +#include "F3DEX2.h" +#include "L3DEX2.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "gSP.h" +#include "gDP.h" +#include "GBI.h" + +void L3DEX2_Line3D( u32 w0, u32 w1 ) +{ + u32 wd = _SHIFTR( w0, 0, 8 ); + + if (wd == 0) + gSPLine3D( _SHIFTR( w0, 17, 7 ), _SHIFTR( w0, 9, 7 ), 0 ); + else + gSPLineW3D( _SHIFTR( w0, 17, 7 ), _SHIFTR( w0, 9, 7 ), wd, 0 ); +} + +void L3DEX2_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3DEX2 ); + + GBI.PCStackSize = 18; + + // GBI Command Command Value Command Function +// GBI_SetGBI( G_BG_COPY, 0x0A, S2DEX_BG_Copy ); + GBI_SetGBI( G_RDPHALF_2, F3DEX2_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_SETOTHERMODE_H, F3DEX2_SETOTHERMODE_H, F3DEX2_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3DEX2_SETOTHERMODE_L, F3DEX2_SetOtherMode_L ); + GBI_SetGBI( G_RDPHALF_1, F3DEX2_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_SPNOOP, F3DEX2_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_ENDDL, F3DEX2_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_DL, F3DEX2_DL, F3D_DList ); + GBI_SetGBI( G_LOAD_UCODE, F3DEX2_LOAD_UCODE, F3DEX_Load_uCode ); + GBI_SetGBI( G_MOVEMEM, F3DEX2_MOVEMEM, F3DEX2_MoveMem ); + GBI_SetGBI( G_MOVEWORD, F3DEX2_MOVEWORD, F3DEX2_MoveWord ); + GBI_SetGBI( G_MTX, F3DEX2_MTX, F3DEX2_Mtx ); + GBI_SetGBI( G_GEOMETRYMODE, F3DEX2_GEOMETRYMODE, F3DEX2_GeometryMode ); + GBI_SetGBI( G_POPMTX, F3DEX2_POPMTX, F3DEX2_PopMtx ); + GBI_SetGBI( G_TEXTURE, F3DEX2_TEXTURE, F3DEX2_Texture ); + GBI_SetGBI( G_DMA_IO, F3DEX2_DMA_IO, F3DEX2_DMAIO ); + GBI_SetGBI( G_SPECIAL_1, F3DEX2_SPECIAL_1, F3DEX2_Special_1 ); + GBI_SetGBI( G_SPECIAL_2, F3DEX2_SPECIAL_2, F3DEX2_Special_2 ); + GBI_SetGBI( G_SPECIAL_3, F3DEX2_SPECIAL_3, F3DEX2_Special_3 ); + + GBI_SetGBI( G_VTX, F3DEX2_VTX, F3DEX2_Vtx ); + GBI_SetGBI( G_MODIFYVTX, F3DEX2_MODIFYVTX, F3DEX_ModifyVtx ); + GBI_SetGBI( G_CULLDL, F3DEX2_CULLDL, F3DEX_CullDL ); + GBI_SetGBI( G_BRANCH_Z, F3DEX2_BRANCH_Z, F3DEX_Branch_Z ); +// GBI_SetGBI( G_TRI1, F3DEX2_TRI1, F3DEX2_Tri1 ); +// GBI_SetGBI( G_TRI2, F3DEX2_TRI2, F3DEX_Tri2 ); +// GBI_SetGBI( G_QUAD, F3DEX2_QUAD, F3DEX2_Quad ); + GBI_SetGBI( G_LINE3D, L3DEX2_LINE3D, L3DEX2_Line3D ); +} + diff --git a/source/gles2n64/src/L3DEX2.h b/source/gles2n64/src/L3DEX2.h new file mode 100644 index 0000000..dde6952 --- /dev/null +++ b/source/gles2n64/src/L3DEX2.h @@ -0,0 +1,10 @@ +#ifndef L3DEX2_H +#define L3DEX2_H +#include "Types.h" + +#define L3DEX2_LINE3D 0x08 + +void L3DEX2_Line3D( u32 w0, u32 w1 ); +void L3DEX2_Init(); +#endif + diff --git a/source/gles2n64/src/N64.cpp b/source/gles2n64/src/N64.cpp new file mode 100644 index 0000000..0eb902c --- /dev/null +++ b/source/gles2n64/src/N64.cpp @@ -0,0 +1,11 @@ +#include "N64.h" +#include "Types.h" + +u8 *DMEM; +u8 *IMEM; +u64 TMEM[512]; +u8 *RDRAM; +u32 RDRAMSize; + +N64Regs REG; + diff --git a/source/gles2n64/src/N64.h b/source/gles2n64/src/N64.h new file mode 100644 index 0000000..c56a2c8 --- /dev/null +++ b/source/gles2n64/src/N64.h @@ -0,0 +1,46 @@ +#ifndef N64_H +#define N64_H + +#include "Types.h" + +#define MI_INTR_SP 0x1 // Bit 1: SP intr +#define MI_INTR_DP 0x20 // Bit 5: DP intr + +struct N64Regs +{ + u32 *MI_INTR; + + u32 *DPC_START; + u32 *DPC_END; + u32 *DPC_CURRENT; + u32 *DPC_STATUS; + u32 *DPC_CLOCK; + u32 *DPC_BUFBUSY; + u32 *DPC_PIPEBUSY; + u32 *DPC_TMEM; + + u32 *VI_STATUS; + u32 *VI_ORIGIN; + u32 *VI_WIDTH; + u32 *VI_INTR; + u32 *VI_V_CURRENT_LINE; + u32 *VI_TIMING; + u32 *VI_V_SYNC; + u32 *VI_H_SYNC; + u32 *VI_LEAP; + u32 *VI_H_START; + u32 *VI_V_START; + u32 *VI_V_BURST; + u32 *VI_X_SCALE; + u32 *VI_Y_SCALE; +}; + +extern N64Regs REG; +extern u8 *DMEM; +extern u8 *IMEM; +extern u8 *RDRAM; +extern u64 TMEM[512]; +extern u32 RDRAMSize; + +#endif + diff --git a/source/gles2n64/src/OpenGL.cpp b/source/gles2n64/src/OpenGL.cpp new file mode 100755 index 0000000..be71371 --- /dev/null +++ b/source/gles2n64/src/OpenGL.cpp @@ -0,0 +1,1361 @@ + +#include +#include +#include +#include +#include +#include + +//// paulscode, added for SDL linkage: +#ifdef USE_SDL + #include + // TODO: Remove this bandaid for SDL 2.0 compatibility (needed for SDL_SetVideoMode) + #if SDL_VERSION_ATLEAST(2,0,0) + #include "sdl2_compat.h" // Slightly hacked version of core/vidext_sdl2_compat.h + #endif + #include "eglport.h" +#endif +//// + +#include "Common.h" +#include "gles2N64.h" +#include "OpenGL.h" +#include "Types.h" +#include "N64.h" +#include "gSP.h" +#include "gDP.h" +#include "Textures.h" +#include "ShaderCombiner.h" +#include "VI.h" +#include "RSP.h" +#include "Config.h" +#include "ticks.h" + +#include "FrameSkipper.h" + +//#include "ae_bridge.h" + +//// paulscode, function prototype missing from Yongzh's code +void OGL_UpdateDepthUpdate(); +//// + +#ifdef TEXTURECACHE_TEST +int TextureCacheTime = 0; +#endif + + +#ifdef RENDERSTATE_TEST +int StateChanges = 0; +#endif + +#ifdef SHADER_TEST +int ProgramSwaps = 0; +#endif + +#ifdef BATCH_TEST +int TotalDrawTime = 0; +int TotalTriangles = 0; +int TotalDrawCalls = 0; +#define glDrawElements(A,B,C,D) \ + TotalTriangles += B; TotalDrawCalls++; int t = ticksGetTicks(); glDrawElements(A,B,C,D); TotalDrawTime += (ticksGetTicks() - t); +#define glDrawArrays(A,B,C) \ + TotalTriangles += C; TotalDrawCalls++; int t = ticksGetTicks(); glDrawArrays(A,B,C); TotalDrawTime += (ticksGetTicks() - t); + +#endif + +GLInfo OGL; + +const char _default_vsh[] = " \n\t" \ +"attribute highp vec2 aPosition; \n\t" \ +"attribute highp vec2 aTexCoord; \n\t" \ +"varying mediump vec2 vTexCoord; \n\t" \ +"void main(){ \n\t" \ +"gl_Position = vec4(aPosition.x, aPosition.y, 0.0, 1.0);\n\t" \ +"vTexCoord = aTexCoord; \n\t" \ +"} \n\t"; + +const char _default_fsh[] = " \n\t" \ +"uniform sampler2D uTex; \n\t" \ +"varying mediump vec2 vTexCoord; \n\t" \ +"void main(){ \n\t" \ +"gl_FragColor = texture2D(uTex, vTexCoord); \n\t" \ +"} \n\t"; + +void OGL_EnableRunfast() +{ +#ifdef ARM_ASM + static const unsigned int x = 0x04086060; + static const unsigned int y = 0x03000000; + int r; + asm volatile ( + "fmrx %0, fpscr \n\t" //r0 = FPSCR + "and %0, %0, %1 \n\t" //r0 = r0 & 0x04086060 + "orr %0, %0, %2 \n\t" //r0 = r0 | 0x03000000 + "fmxr fpscr, %0 \n\t" //FPSCR = r0 + : "=r"(r) + : "r"(x), "r"(y) + ); +#endif +} + +int OGL_IsExtSupported( const char *extension ) +{ + const GLubyte *extensions = NULL; + const GLubyte *start; + GLubyte *where, *terminator; + + where = (GLubyte *) strchr(extension, ' '); + if (where || *extension == '\0') + return 0; + + extensions = glGetString(GL_EXTENSIONS); + + if (!extensions) return 0; + + start = extensions; + for (;;) + { + where = (GLubyte *) strstr((const char *) start, extension); + if (!where) + break; + + terminator = where + strlen(extension); + if (where == start || *(where - 1) == ' ') + if (*terminator == ' ' || *terminator == '\0') + return 1; + + start = terminator; + } + + return 0; +} + +extern void _glcompiler_error(GLint shader); + +void OGL_InitStates() +{ + GLint success; + + glEnable( GL_CULL_FACE ); + glEnableVertexAttribArray( SC_POSITION ); + glEnable( GL_DEPTH_TEST ); + glDepthFunc( GL_ALWAYS ); + glDepthMask( GL_FALSE ); + glEnable( GL_SCISSOR_TEST ); + +///// paulscode, fixes missing graphics on Qualcomm, Adreno: + glDepthRangef(0.0f, 1.0f); + + // default values (only seem to work on OMAP!) + glPolygonOffset(0.2f, 0.2f); + + //// paulscode, added for different configurations based on hardware + // (part of the missing shadows and stars bug fix) +/* int hardwareType = Android_JNI_GetHardwareType(); + float f1, f2; + Android_JNI_GetPolygonOffset(hardwareType, 1, &f1, &f2); + glPolygonOffset( f1, f2 ); +*/ //// + +// some other settings that have been tried, which do not work: + //glDepthRangef(1.0f, 0.0f); // reverses depth-order on OMAP3 chipsets + //glPolygonOffset(-0.2f, -0.2f); + //glDepthRangef( 0.09f, (float)0x7FFF ); // should work, but not on Adreno + //glPolygonOffset( -0.2f, 0.2f ); + //glDepthRangef(0.0f, (float)0x7FFF); // what Yongzh used, broken on Adreno + //glPolygonOffset(0.2f, 0.2f); +///// + + + glViewport(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height); + + //create default shader program + LOG( LOG_VERBOSE, "Generate Default Shader Program.\n" ); + + const char *src[1]; + src[0] = _default_fsh; + OGL.defaultFragShader = glCreateShader( GL_FRAGMENT_SHADER ); + glShaderSource( OGL.defaultFragShader, 1, (const char**) src, NULL ); + glCompileShader( OGL.defaultFragShader ); + glGetShaderiv( OGL.defaultFragShader, GL_COMPILE_STATUS, &success ); + if (!success) + { + LOG(LOG_ERROR, "Failed to produce default fragment shader.\n"); + } + + src[0] = _default_vsh; + OGL.defaultVertShader = glCreateShader( GL_VERTEX_SHADER ); + glShaderSource( OGL.defaultVertShader, 1, (const char**) src, NULL ); + glCompileShader( OGL.defaultVertShader ); + glGetShaderiv( OGL.defaultVertShader, GL_COMPILE_STATUS, &success ); + if( !success ) + { + LOG( LOG_ERROR, "Failed to produce default vertex shader.\n" ); + _glcompiler_error( OGL.defaultVertShader ); + } + + OGL.defaultProgram = glCreateProgram(); + glBindAttribLocation( OGL.defaultProgram, 0, "aPosition" ); + glBindAttribLocation( OGL.defaultProgram, 1, "aTexCoord" ); + glAttachShader( OGL.defaultProgram, OGL.defaultFragShader ); + glAttachShader( OGL.defaultProgram, OGL.defaultVertShader ); + glLinkProgram( OGL.defaultProgram ); + glGetProgramiv( OGL.defaultProgram, GL_LINK_STATUS, &success ); + if( !success ) + { + LOG( LOG_ERROR, "Failed to link default program.\n" ); + _glcompiler_error( OGL.defaultFragShader ); + } + glUniform1i( glGetUniformLocation( OGL.defaultProgram, "uTex" ), 0 ); + glUseProgram( OGL.defaultProgram ); + +} + +void OGL_UpdateScale() +{ + OGL.scaleX = (float)config.framebuffer.width / (float)VI.width; + OGL.scaleY = (float)config.framebuffer.height / (float)VI.height; +} + +void OGL_ResizeWindow(int x, int y, int width, int height) +{ + config.window.xpos = x; + config.window.ypos = y; + config.window.width = width; + config.window.height = height; + + config.framebuffer.xpos = x; + config.framebuffer.ypos = y; + config.framebuffer.width = width; + config.framebuffer.height = height; + OGL_UpdateScale(); + + glViewport(config.framebuffer.xpos, config.framebuffer.ypos, + config.framebuffer.width, config.framebuffer.height); +} + +////// paulscode, added for SDL linkage +#ifdef USE_SDL +bool OGL_SDL_Start() +{ + /* Initialize SDL */ + LOG(LOG_MINIMAL, "Initializing SDL video subsystem...\n" ); + if (SDL_InitSubSystem( SDL_INIT_VIDEO ) == -1) + { + LOG(LOG_ERROR, "Error initializing SDL video subsystem: %s\n", SDL_GetError() ); + return FALSE; + } +/*SEB* + int current_w = config.window.width; + int current_h = config.window.height; +*/ + int current_w = 800; + int current_h = 480; + /* Set the video mode */ + LOG(LOG_MINIMAL, "Setting video mode %dx%d...\n", current_w, current_h ); + +// TODO: I should actually check what the pixelformat is, rather than assuming 16 bpp (RGB_565) or 32 bpp (RGBA_8888): +//// paulscode, added for switching between modes RGBA8888 and RGB565 +// (part of the color banding fix) +int bitsPP; +/*if( Android_JNI_UseRGBA8888() ) + bitsPP = 32; +else*/ + bitsPP = 16; +//// + + // TODO: Replace SDL_SetVideoMode with something that is SDL 2.0 compatible + // Better yet, eliminate all SDL calls by using the Mupen64Plus core api + if (!(OGL.hScreen = SDL_SetVideoMode( current_w, current_h, bitsPP, SDL_HWSURFACE ))) + { + LOG(LOG_ERROR, "Problem setting videomode %dx%d: %s\n", current_w, current_h, SDL_GetError() ); + SDL_QuitSubSystem( SDL_INIT_VIDEO ); + return FALSE; + } + +//// paulscode, fixes the screen-size problem + const float ratio = ( config.romPAL ? 9.0f/11.0f : 0.75f ); + int videoWidth = config.window.refwidth; + int videoHeight = config.window.refheight; + int x = 0; + int y = 0; + + //re-scale width and height on per-rom basis + float width = /*(float)videoWidth * (float)config.window.refwidth /*/ 800.f; + float height = /*(float)videoHeight * (float)config.window.refheight / */480.f; + + if (!config.stretchVideo) { +/* if ((float)videoWith*480.0f/(float)videoHeight/800.0f>1.0f) { + //scale by Width + } else { + //scale by Height + }*/ + videoWidth = (int) (height / ratio); + if (videoWidth > width) { + videoWidth = width; + videoHeight = (int) (width * ratio); + } + } else { + videoWidth=800; + videoHeight=480; + } + x = (width - videoWidth) / 2; + y = (height - videoHeight) / 2; + + //set xpos and ypos + config.window.xpos = x; + config.window.ypos = y; + config.framebuffer.xpos = x; + config.framebuffer.ypos = y; + + //set width and height + config.window.width = (int)videoWidth; + config.window.height = (int)videoHeight; + config.framebuffer.width = (int)videoWidth; + config.framebuffer.height = (int)videoHeight; + + EGL_Open(800, 480); +//// + return true; +} +#endif +////// + +#ifdef USE_SDL +void Android_JNI_SwapWindow() +{ + EGL_SwapBuffers(); +} +#endif + + +bool OGL_Start() +{ +// paulscode, initialize SDL +#ifdef USE_SDL + if (!OGL_SDL_Start()) + return false; +#endif +// + + OGL_InitStates(); + +#ifdef USE_SDL +/////// paulscode, graphics bug-fixes + float depth = gDP.fillColor.z ; + glDisable( GL_SCISSOR_TEST ); + glDepthMask( GL_TRUE ); // fixes side-bar graphics glitches +// glClearDepthf( depth ); // broken on Qualcomm Adreno + glClearDepthf( 1.0f ); // fixes missing graphics on Qualcomm Adreno + glClearColor( 0, 0, 0, 1 ); + glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); + glFinish(); + Android_JNI_SwapWindow(); // paulscode, fix for black-screen bug + glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); + glFinish(); + OGL_UpdateDepthUpdate(); + glEnable( GL_SCISSOR_TEST ); +//////// +#endif + + //create framebuffer + if (config.framebuffer.enable) + { + LOG(LOG_VERBOSE, "Create offscreen framebuffer. \n"); + if (config.framebuffer.width == config.window.width && config.framebuffer.height == config.window.height) + { + LOG(LOG_WARNING, "There's no point in using a offscreen framebuffer when the window and screen dimensions are the same\n"); + } + + glGenFramebuffers(1, &OGL.framebuffer.fb); + glGenRenderbuffers(1, &OGL.framebuffer.depth_buffer); + glGenTextures(1, &OGL.framebuffer.color_buffer); + glBindRenderbuffer(GL_RENDERBUFFER, OGL.framebuffer.depth_buffer); + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, config.framebuffer.width, config.framebuffer.height); + glBindTexture(GL_TEXTURE_2D, OGL.framebuffer.color_buffer); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, config.framebuffer.width, config.framebuffer.height, 0, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, NULL); + glBindFramebuffer(GL_FRAMEBUFFER, OGL.framebuffer.fb); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, OGL.framebuffer.color_buffer, 0); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, OGL.framebuffer.depth_buffer); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + LOG(LOG_ERROR, "Incomplete Framebuffer Object: "); + switch(glCheckFramebufferStatus(GL_FRAMEBUFFER)) + { + case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT: + printf("Incomplete Attachment. \n"); break; + case GL_FRAMEBUFFER_UNSUPPORTED: + printf("Framebuffer Unsupported. \n"); break; + case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS: + printf("Incomplete Dimensions. \n"); break; + } + config.framebuffer.enable = 0; + glBindFramebuffer(GL_FRAMEBUFFER, 0); + } + } + + //check extensions + if ((config.texture.maxAnisotropy>0) && !OGL_IsExtSupported("GL_EXT_texture_filter_anistropic")) + { + LOG(LOG_WARNING, "Anistropic Filtering is not supported.\n"); + config.texture.maxAnisotropy = 0; + } + + float f = 0; + glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &f); + if (config.texture.maxAnisotropy > ((int)f)) + { + LOG(LOG_WARNING, "Clamping max anistropy to %ix.\n", (int)f); + config.texture.maxAnisotropy = (int)f; + } + + //Print some info + LOG(LOG_VERBOSE, "Width: %i Height:%i \n", config.framebuffer.width, config.framebuffer.height); + LOG(LOG_VERBOSE, "[gles2n64]: Enable Runfast... \n"); + + OGL_EnableRunfast(); + OGL_UpdateScale(); + + //We must have a shader bound before binding any textures: + ShaderCombiner_Init(); + ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0)); + ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, SHADE, 0, 0, 0, 1, 0, 0, 0, SHADE, 0, 0, 0, 1)); + + TextureCache_Init(); + + memset(OGL.triangles.vertices, 0, VERTBUFF_SIZE * sizeof(SPVertex)); + memset(OGL.triangles.elements, 0, ELEMBUFF_SIZE * sizeof(GLubyte)); + OGL.triangles.num = 0; + +#ifdef __TRIBUFFER_OPT + __indexmap_init(); +#endif + + OGL.frameSkipped = 0; + for(int i = 0; i < OGL_FRAMETIME_NUM; i++) OGL.frameTime[i] = 0; + + OGL.renderingToTexture = false; + OGL.renderState = RS_NONE; + gSP.changed = gDP.changed = 0xFFFFFFFF; + VI.displayNum = 0; + glGetError(); + + return TRUE; +} + +void OGL_Stop() +{ + LOG(LOG_MINIMAL, "Stopping OpenGL\n"); + +#ifdef USE_SDL + EGL_Close(); + SDL_QuitSubSystem( SDL_INIT_VIDEO ); +#endif + + if (config.framebuffer.enable) + { + glDeleteFramebuffers(1, &OGL.framebuffer.fb); + glDeleteTextures(1, &OGL.framebuffer.color_buffer); + glDeleteRenderbuffers(1, &OGL.framebuffer.depth_buffer); + } + + glDeleteShader(OGL.defaultFragShader); + glDeleteShader(OGL.defaultVertShader); + glDeleteProgram(OGL.defaultProgram); + + ShaderCombiner_Destroy(); + TextureCache_Destroy(); +} + +void OGL_UpdateCullFace() +{ + if (config.enableFaceCulling && (gSP.geometryMode & G_CULL_BOTH)) + { + glEnable( GL_CULL_FACE ); + if ((gSP.geometryMode & G_CULL_BACK) && (gSP.geometryMode & G_CULL_FRONT)) + glCullFace(GL_FRONT_AND_BACK); + else if (gSP.geometryMode & G_CULL_BACK) + glCullFace(GL_BACK); + else + glCullFace(GL_FRONT); + } + else + glDisable(GL_CULL_FACE); +} + +void OGL_UpdateViewport() +{ + int x, y, w, h; + x = config.framebuffer.xpos + (int)(gSP.viewport.x * OGL.scaleX); + y = config.framebuffer.ypos + (int)((VI.height - (gSP.viewport.y + gSP.viewport.height)) * OGL.scaleY); + w = (int)(gSP.viewport.width * OGL.scaleX); + h = (int)(gSP.viewport.height * OGL.scaleY); + + glViewport(x, y, w, h); +} + +void OGL_UpdateDepthUpdate() +{ + if (gDP.otherMode.depthUpdate) + glDepthMask(GL_TRUE); + else + glDepthMask(GL_FALSE); +} + +void OGL_UpdateScissor() +{ + int x, y, w, h; + x = config.framebuffer.xpos + (int)(gDP.scissor.ulx * OGL.scaleX); + y = config.framebuffer.ypos + (int)((VI.height - gDP.scissor.lry) * OGL.scaleY); + w = (int)((gDP.scissor.lrx - gDP.scissor.ulx) * OGL.scaleX); + h = (int)((gDP.scissor.lry - gDP.scissor.uly) * OGL.scaleY); + glScissor(x, y, w, h); +} + +//copied from RICE VIDEO +void OGL_SetBlendMode() +{ + + u32 blender = gDP.otherMode.l >> 16; + u32 blendmode_1 = blender&0xcccc; + u32 blendmode_2 = blender&0x3333; + + glEnable(GL_BLEND); + switch(gDP.otherMode.cycleType) + { + case G_CYC_FILL: + glDisable(GL_BLEND); + break; + + case G_CYC_COPY: + glBlendFunc(GL_ONE, GL_ZERO); + break; + + case G_CYC_2CYCLE: + if (gDP.otherMode.forceBlender && gDP.otherMode.depthCompare) + { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + break; + } + + switch(blendmode_1+blendmode_2) + { + case BLEND_PASS+(BLEND_PASS>>2): // In * 0 + In * 1 + case BLEND_FOG_APRIM+(BLEND_PASS>>2): + case BLEND_FOG_MEM_FOG_MEM + (BLEND_OPA>>2): + case BLEND_FOG_APRIM + (BLEND_OPA>>2): + case BLEND_FOG_ASHADE + (BLEND_OPA>>2): + case BLEND_BI_AFOG + (BLEND_OPA>>2): + case BLEND_FOG_ASHADE + (BLEND_NOOP>>2): + case BLEND_NOOP + (BLEND_OPA>>2): + case BLEND_NOOP4 + (BLEND_NOOP>>2): + case BLEND_FOG_ASHADE+(BLEND_PASS>>2): + case BLEND_FOG_3+(BLEND_PASS>>2): + glDisable(GL_BLEND); + break; + + case BLEND_PASS+(BLEND_OPA>>2): + if (gDP.otherMode.cvgXAlpha && gDP.otherMode.alphaCvgSel) + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + else + glDisable(GL_BLEND); + break; + + case BLEND_PASS + (BLEND_XLU>>2): + case BLEND_FOG_ASHADE + (BLEND_XLU>>2): + case BLEND_FOG_APRIM + (BLEND_XLU>>2): + case BLEND_FOG_MEM_FOG_MEM + (BLEND_PASS>>2): + case BLEND_XLU + (BLEND_XLU>>2): + case BLEND_BI_AFOG + (BLEND_XLU>>2): + case BLEND_XLU + (BLEND_FOG_MEM_IN_MEM>>2): + case BLEND_PASS + (BLEND_FOG_MEM_IN_MEM>>2): + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + break; + + case BLEND_FOG_ASHADE+0x0301: + glBlendFunc(GL_SRC_ALPHA, GL_ZERO); + break; + + case 0x0c08+0x1111: + glBlendFunc(GL_ZERO, GL_DST_ALPHA); + break; + + default: + if (blendmode_2 == (BLEND_PASS>>2)) + glDisable(GL_BLEND); + else + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + break; + } + break; + + default: + + if (gDP.otherMode.forceBlender && gDP.otherMode.depthCompare && blendmode_1 != BLEND_FOG_ASHADE ) + { + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + break; + } + + switch (blendmode_1) + { + case BLEND_XLU: + case BLEND_BI_AIN: + case BLEND_FOG_MEM: + case BLEND_FOG_MEM_IN_MEM: + case BLEND_BLENDCOLOR: + case 0x00c0: + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + break; + + case BLEND_MEM_ALPHA_IN: + glBlendFunc(GL_ZERO, GL_DST_ALPHA); + break; + + case BLEND_OPA: + //if( options.enableHackForGames == HACK_FOR_MARIO_TENNIS ) + //{ + // glBlendFunc(BLEND_SRCALPHA, BLEND_INVSRCALPHA); + //} + + glDisable(GL_BLEND); + break; + + case BLEND_PASS: + case BLEND_NOOP: + case BLEND_FOG_ASHADE: + case BLEND_FOG_MEM_3: + case BLEND_BI_AFOG: + glDisable(GL_BLEND); + break; + + case BLEND_FOG_APRIM: + glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ZERO); + break; + + case BLEND_NOOP3: + case BLEND_NOOP5: + case BLEND_MEM: + glBlendFunc(GL_ZERO, GL_ONE); + break; + + default: + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } + } + +} + +void OGL_UpdateStates() +{ + if (gDP.otherMode.cycleType == G_CYC_COPY) + ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0)); + else if (gDP.otherMode.cycleType == G_CYC_FILL) + ShaderCombiner_Set(EncodeCombineMode(0, 0, 0, SHADE, 0, 0, 0, 1, 0, 0, 0, SHADE, 0, 0, 0, 1)); + else + ShaderCombiner_Set(gDP.combine.mux); + +#ifdef SHADER_TEST + ProgramSwaps += scProgramChanged; +#endif + + if (gSP.changed & CHANGED_GEOMETRYMODE) + { + OGL_UpdateCullFace(); + + if (gSP.geometryMode & G_ZBUFFER) + glEnable(GL_DEPTH_TEST); + else + glDisable(GL_DEPTH_TEST); + + } + + if (gDP.changed & CHANGED_CONVERT) + { + SC_SetUniform1f(uK4, gDP.convert.k4); + SC_SetUniform1f(uK5, gDP.convert.k5); + } + + if (gDP.changed & CHANGED_RENDERMODE || gDP.changed & CHANGED_CYCLETYPE) + { + if (gDP.otherMode.cycleType == G_CYC_1CYCLE || gDP.otherMode.cycleType == G_CYC_2CYCLE) + { + //glDepthFunc((gDP.otherMode.depthCompare) ? GL_GEQUAL : GL_ALWAYS); + glDepthFunc((gDP.otherMode.depthCompare) ? GL_LESS : GL_ALWAYS); + glDepthMask((gDP.otherMode.depthUpdate) ? GL_TRUE : GL_FALSE); + + if (gDP.otherMode.depthMode == ZMODE_DEC) + glEnable(GL_POLYGON_OFFSET_FILL); + else + glDisable(GL_POLYGON_OFFSET_FILL); + } + else + { + glDepthFunc(GL_ALWAYS); + glDepthMask(GL_FALSE); + } + } + + if ((gDP.changed & CHANGED_BLENDCOLOR) || (gDP.changed & CHANGED_RENDERMODE)) + SC_SetUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5f : gDP.blendColor.a); + + if (gDP.changed & CHANGED_SCISSOR) + OGL_UpdateScissor(); + + if (gSP.changed & CHANGED_VIEWPORT) + OGL_UpdateViewport(); + + if (gSP.changed & CHANGED_FOGPOSITION) + { + SC_SetUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f); + SC_SetUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f); + } + + if (gSP.changed & CHANGED_TEXTURESCALE) + { + if (scProgramCurrent->usesT0 || scProgramCurrent->usesT1) + SC_SetUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet); + } + + if ((gSP.changed & CHANGED_TEXTURE) || (gDP.changed & CHANGED_TILE) || (gDP.changed & CHANGED_TMEM)) + { + //For some reason updating the texture cache on the first frame of LOZ:OOT causes a NULL Pointer exception... + if (scProgramCurrent) + { + if (scProgramCurrent->usesT0) + { +#ifdef TEXTURECACHE_TEST + unsigned t = ticksGetTicks(); + TextureCache_Update(0); + TextureCacheTime += (ticksGetTicks() - t); +#else + TextureCache_Update(0); +#endif + SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult); + SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT); + SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT); + SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT); + } + //else TextureCache_ActivateDummy(0); + + //Note: enabling dummies makes some F-zero X textures flicker.... strange. + + if (scProgramCurrent->usesT1) + { +#ifdef TEXTURECACHE_TEST + unsigned t = ticksGetTicks(); + TextureCache_Update(1); + TextureCacheTime += (ticksGetTicks() - t); +#else + TextureCache_Update(1); +#endif + SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult); + SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT); + SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT); + SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT); + } + //else TextureCache_ActivateDummy(1); + } + } + + if ((gDP.changed & CHANGED_FOGCOLOR) && config.enableFog) + SC_SetUniform4fv(uFogColor, &gDP.fogColor.r ); + + if (gDP.changed & CHANGED_ENV_COLOR) + SC_SetUniform4fv(uEnvColor, &gDP.envColor.r); + + if (gDP.changed & CHANGED_PRIM_COLOR) + { + SC_SetUniform4fv(uPrimColor, &gDP.primColor.r); + SC_SetUniform1f(uPrimLODFrac, gDP.primColor.l); + } + + if ((gDP.changed & CHANGED_RENDERMODE) || (gDP.changed & CHANGED_CYCLETYPE)) + { +#ifndef OLD_BLENDMODE + OGL_SetBlendMode(); +#else + if ((gDP.otherMode.forceBlender) && + (gDP.otherMode.cycleType != G_CYC_COPY) && + (gDP.otherMode.cycleType != G_CYC_FILL) && + !(gDP.otherMode.alphaCvgSel)) + { + glEnable( GL_BLEND ); + + switch (gDP.otherMode.l >> 16) + { + case 0x0448: // Add + case 0x055A: + glBlendFunc( GL_ONE, GL_ONE ); + break; + case 0x0C08: // 1080 Sky + case 0x0F0A: // Used LOTS of places + glBlendFunc( GL_ONE, GL_ZERO ); + break; + + case 0x0040: // Fzero + case 0xC810: // Blends fog + case 0xC811: // Blends fog + case 0x0C18: // Standard interpolated blend + case 0x0C19: // Used for antialiasing + case 0x0050: // Standard interpolated blend + case 0x0055: // Used for antialiasing + glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA ); + break; + + case 0x0FA5: // Seems to be doing just blend color - maybe combiner can be used for this? + case 0x5055: // Used in Paper Mario intro, I'm not sure if this is right... + glBlendFunc( GL_ZERO, GL_ONE ); + break; + + default: + LOG(LOG_VERBOSE, "Unhandled blend mode=%x", gDP.otherMode.l >> 16); + glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA ); + break; + } + } + else + { + glDisable( GL_BLEND ); + } + + if (gDP.otherMode.cycleType == G_CYC_FILL) + { + glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA ); + glEnable( GL_BLEND ); + } +#endif + } + + gDP.changed &= CHANGED_TILE | CHANGED_TMEM; + gSP.changed &= CHANGED_TEXTURE | CHANGED_MATRIX; +} + +void OGL_DrawTriangle(SPVertex *vertices, int v0, int v1, int v2) +{ + +} + +void OGL_AddTriangle(int v0, int v1, int v2) +{ + OGL.triangles.elements[OGL.triangles.num++] = v0; + OGL.triangles.elements[OGL.triangles.num++] = v1; + OGL.triangles.elements[OGL.triangles.num++] = v2; +} + +void OGL_SetColorArray() +{ + if (scProgramCurrent->usesCol) + glEnableVertexAttribArray(SC_COLOR); + else + glDisableVertexAttribArray(SC_COLOR); +} + +void OGL_SetTexCoordArrays() +{ + if (scProgramCurrent->usesT0) + glEnableVertexAttribArray(SC_TEXCOORD0); + else + glDisableVertexAttribArray(SC_TEXCOORD0); + + if (scProgramCurrent->usesT1) + glEnableVertexAttribArray(SC_TEXCOORD1); + else + glDisableVertexAttribArray(SC_TEXCOORD1); +} + +void OGL_DrawTriangles() +{ + if (OGL.renderingToTexture && config.ignoreOffscreenRendering) + { + OGL.triangles.num = 0; + return; + } + + if (OGL.triangles.num == 0) return; + + if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate) + OGL_SwapBuffers(); + + if (gSP.changed || gDP.changed) + OGL_UpdateStates(); + + if (OGL.renderState != RS_TRIANGLE || scProgramChanged) + { + OGL_SetColorArray(); + OGL_SetTexCoordArrays(); + glDisableVertexAttribArray(SC_TEXCOORD1); + SC_ForceUniform1f(uRenderState, RS_TRIANGLE); + } + + if (OGL.renderState != RS_TRIANGLE) + { +#ifdef RENDERSTATE_TEST + StateChanges++; +#endif + glVertexAttribPointer(SC_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].x); + glVertexAttribPointer(SC_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].r); + glVertexAttribPointer(SC_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].s); + + OGL_UpdateCullFace(); + OGL_UpdateViewport(); + glEnable(GL_SCISSOR_TEST); + OGL.renderState = RS_TRIANGLE; + } + + glDrawElements(GL_TRIANGLES, OGL.triangles.num, GL_UNSIGNED_BYTE, OGL.triangles.elements); + OGL.triangles.num = 0; + +#ifdef __TRIBUFFER_OPT + __indexmap_clear(); +#endif +} + +void OGL_DrawLine(int v0, int v1, float width ) +{ + if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return; + + if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate) + OGL_SwapBuffers(); + + if (gSP.changed || gDP.changed) + OGL_UpdateStates(); + + if (OGL.renderState != RS_LINE || scProgramChanged) + { +#ifdef RENDERSTATE_TEST + StateChanges++; +#endif + OGL_SetColorArray(); + glDisableVertexAttribArray(SC_TEXCOORD0); + glDisableVertexAttribArray(SC_TEXCOORD1); + glVertexAttribPointer(SC_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].x); + glVertexAttribPointer(SC_COLOR, 4, GL_FLOAT, GL_FALSE, sizeof(SPVertex), &OGL.triangles.vertices[0].r); + + SC_ForceUniform1f(uRenderState, RS_LINE); + OGL_UpdateCullFace(); + OGL_UpdateViewport(); + OGL.renderState = RS_LINE; + } + + unsigned short elem[2]; + elem[0] = v0; + elem[1] = v1; + glLineWidth( width * OGL.scaleX ); + glDrawElements(GL_LINES, 2, GL_UNSIGNED_SHORT, elem); +} + +void OGL_DrawRect( int ulx, int uly, int lrx, int lry, float *color) +{ + if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return; + + if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate) + OGL_SwapBuffers(); + + if (gSP.changed || gDP.changed) + OGL_UpdateStates(); + + if (OGL.renderState != RS_RECT || scProgramChanged) + { + glDisableVertexAttribArray(SC_COLOR); + glDisableVertexAttribArray(SC_TEXCOORD0); + glDisableVertexAttribArray(SC_TEXCOORD1); + SC_ForceUniform1f(uRenderState, RS_RECT); + } + + if (OGL.renderState != RS_RECT) + { +#ifdef RENDERSTATE_TEST + StateChanges++; +#endif + glVertexAttrib4f(SC_POSITION, 0, 0, gSP.viewport.nearz, 1.0); + glVertexAttribPointer(SC_POSITION, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].x); + OGL.renderState = RS_RECT; + } + + glViewport(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height ); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_CULL_FACE); + + OGL.rect[0].x = (float) ulx * (2.0f * VI.rwidth) - 1.0; + OGL.rect[0].y = (float) uly * (-2.0f * VI.rheight) + 1.0; + OGL.rect[1].x = (float) (lrx+1) * (2.0f * VI.rwidth) - 1.0; + OGL.rect[1].y = OGL.rect[0].y; + OGL.rect[2].x = OGL.rect[0].x; + OGL.rect[2].y = (float) (lry+1) * (-2.0f * VI.rheight) + 1.0; + OGL.rect[3].x = OGL.rect[1].x; + OGL.rect[3].y = OGL.rect[2].y; + + glVertexAttrib4fv(SC_COLOR, color); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + glEnable(GL_SCISSOR_TEST); + OGL_UpdateViewport(); + +} + +void OGL_DrawTexturedRect( float ulx, float uly, float lrx, float lry, float uls, float ult, float lrs, float lrt, bool flip ) +{ + if (config.hackBanjoTooie) + { + if (gDP.textureImage.width == gDP.colorImage.width && + gDP.textureImage.format == G_IM_FMT_CI && + gDP.textureImage.size == G_IM_SIZ_8b) + { + return; + } + } + + if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return; + + if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate) + OGL_SwapBuffers(); + + if (gSP.changed || gDP.changed) + OGL_UpdateStates(); + + if (OGL.renderState != RS_TEXTUREDRECT || scProgramChanged) + { + glDisableVertexAttribArray(SC_COLOR); + OGL_SetTexCoordArrays(); + SC_ForceUniform1f(uRenderState, RS_TEXTUREDRECT); + } + + if (OGL.renderState != RS_TEXTUREDRECT) + { +#ifdef RENDERSTATE_TEST + StateChanges++; +#endif + glVertexAttrib4f(SC_COLOR, 0, 0, 0, 0); + glVertexAttrib4f(SC_POSITION, 0, 0, (gDP.otherMode.depthSource == G_ZS_PRIM) ? gDP.primDepth.z : gSP.viewport.nearz, 1.0); + glVertexAttribPointer(SC_POSITION, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].x); + glVertexAttribPointer(SC_TEXCOORD0, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].s0); + glVertexAttribPointer(SC_TEXCOORD1, 2, GL_FLOAT, GL_FALSE, sizeof(GLVertex), &OGL.rect[0].s1); + OGL.renderState = RS_TEXTUREDRECT; + } + + glViewport(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height); + glDisable(GL_CULL_FACE); + + OGL.rect[0].x = (float) ulx * (2.0f * VI.rwidth) - 1.0f; + OGL.rect[0].y = (float) uly * (-2.0f * VI.rheight) + 1.0f; + OGL.rect[1].x = (float) (lrx) * (2.0f * VI.rwidth) - 1.0f; + OGL.rect[1].y = OGL.rect[0].y; + OGL.rect[2].x = OGL.rect[0].x; + OGL.rect[2].y = (float) (lry) * (-2.0f * VI.rheight) + 1.0f; + OGL.rect[3].x = OGL.rect[1].x; + OGL.rect[3].y = OGL.rect[2].y; + + if (scProgramCurrent->usesT0 && cache.current[0] && gSP.textureTile[0]) + { + OGL.rect[0].s0 = uls * cache.current[0]->shiftScaleS - gSP.textureTile[0]->fuls; + OGL.rect[0].t0 = ult * cache.current[0]->shiftScaleT - gSP.textureTile[0]->fult; + OGL.rect[3].s0 = (lrs + 1.0f) * cache.current[0]->shiftScaleS - gSP.textureTile[0]->fuls; + OGL.rect[3].t0 = (lrt + 1.0f) * cache.current[0]->shiftScaleT - gSP.textureTile[0]->fult; + + if ((cache.current[0]->maskS) && !(cache.current[0]->mirrorS) && (fmod( OGL.rect[0].s0, cache.current[0]->width ) == 0.0f)) + { + OGL.rect[3].s0 -= OGL.rect[0].s0; + OGL.rect[0].s0 = 0.0f; + } + + if ((cache.current[0]->maskT) && !(cache.current[0]->mirrorT) && (fmod( OGL.rect[0].t0, cache.current[0]->height ) == 0.0f)) + { + OGL.rect[3].t0 -= OGL.rect[0].t0; + OGL.rect[0].t0 = 0.0f; + } + + glActiveTexture( GL_TEXTURE0); + if ((OGL.rect[0].s0 >= 0.0f) && (OGL.rect[3].s0 <= cache.current[0]->width)) + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE ); + + if ((OGL.rect[0].t0 >= 0.0f) && (OGL.rect[3].t0 <= cache.current[0]->height)) + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE ); + + OGL.rect[0].s0 *= cache.current[0]->scaleS; + OGL.rect[0].t0 *= cache.current[0]->scaleT; + OGL.rect[3].s0 *= cache.current[0]->scaleS; + OGL.rect[3].t0 *= cache.current[0]->scaleT; + } + + if (scProgramCurrent->usesT1 && cache.current[1] && gSP.textureTile[1]) + { + OGL.rect[0].s1 = uls * cache.current[1]->shiftScaleS - gSP.textureTile[1]->fuls; + OGL.rect[0].t1 = ult * cache.current[1]->shiftScaleT - gSP.textureTile[1]->fult; + OGL.rect[3].s1 = (lrs + 1.0f) * cache.current[1]->shiftScaleS - gSP.textureTile[1]->fuls; + OGL.rect[3].t1 = (lrt + 1.0f) * cache.current[1]->shiftScaleT - gSP.textureTile[1]->fult; + + if ((cache.current[1]->maskS) && (fmod( OGL.rect[0].s1, cache.current[1]->width ) == 0.0f) && !(cache.current[1]->mirrorS)) + { + OGL.rect[3].s1 -= OGL.rect[0].s1; + OGL.rect[0].s1 = 0.0f; + } + + if ((cache.current[1]->maskT) && (fmod( OGL.rect[0].t1, cache.current[1]->height ) == 0.0f) && !(cache.current[1]->mirrorT)) + { + OGL.rect[3].t1 -= OGL.rect[0].t1; + OGL.rect[0].t1 = 0.0f; + } + + glActiveTexture( GL_TEXTURE1); + if ((OGL.rect[0].s1 == 0.0f) && (OGL.rect[3].s1 <= cache.current[1]->width)) + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE ); + + if ((OGL.rect[0].t1 == 0.0f) && (OGL.rect[3].t1 <= cache.current[1]->height)) + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE ); + + OGL.rect[0].s1 *= cache.current[1]->scaleS; + OGL.rect[0].t1 *= cache.current[1]->scaleT; + OGL.rect[3].s1 *= cache.current[1]->scaleS; + OGL.rect[3].t1 *= cache.current[1]->scaleT; + } + + if ((gDP.otherMode.cycleType == G_CYC_COPY) && !config.texture.forceBilinear) + { + glActiveTexture(GL_TEXTURE0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST ); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST ); + } + + if (flip) + { + OGL.rect[1].s0 = OGL.rect[0].s0; + OGL.rect[1].t0 = OGL.rect[3].t0; + OGL.rect[1].s1 = OGL.rect[0].s1; + OGL.rect[1].t1 = OGL.rect[3].t1; + OGL.rect[2].s0 = OGL.rect[3].s0; + OGL.rect[2].t0 = OGL.rect[0].t0; + OGL.rect[2].s1 = OGL.rect[3].s1; + OGL.rect[2].t1 = OGL.rect[0].t1; + } + else + { + OGL.rect[1].s0 = OGL.rect[3].s0; + OGL.rect[1].t0 = OGL.rect[0].t0; + OGL.rect[1].s1 = OGL.rect[3].s1; + OGL.rect[1].t1 = OGL.rect[0].t1; + OGL.rect[2].s0 = OGL.rect[0].s0; + OGL.rect[2].t0 = OGL.rect[3].t0; + OGL.rect[2].s1 = OGL.rect[0].s1; + OGL.rect[2].t1 = OGL.rect[3].t1; + } + + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + OGL_UpdateViewport(); +} + +void OGL_ClearDepthBuffer() +{ + if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return; + + if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate) + OGL_SwapBuffers(); + + //float depth = 1.0 - (gDP.fillColor.z / ((float)0x3FFF)); // broken on OMAP3 + float depth = gDP.fillColor.z ; + +/////// paulscode, graphics bug-fixes + glDisable( GL_SCISSOR_TEST ); + glDepthMask( GL_TRUE ); // fixes side-bar graphics glitches +// glClearDepthf( depth ); // broken on Qualcomm Adreno + glClearDepthf( 1.0f ); // fixes missing graphics on Qualcomm Adreno + glClearColor( 0, 0, 0, 1 ); + glClear( GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT ); + OGL_UpdateDepthUpdate(); + glEnable( GL_SCISSOR_TEST ); +//////// +} + +void OGL_ClearColorBuffer( float *color ) +{ + if (OGL.renderingToTexture && config.ignoreOffscreenRendering) return; + + if ((config.updateMode == SCREEN_UPDATE_AT_1ST_PRIMITIVE) && OGL.screenUpdate) + OGL_SwapBuffers(); + + glScissor(config.framebuffer.xpos, config.framebuffer.ypos, config.framebuffer.width, config.framebuffer.height); + glClearColor( color[0], color[1], color[2], color[3] ); + glClear( GL_COLOR_BUFFER_BIT ); + OGL_UpdateScissor(); + +} + +int OGL_CheckError() +{ + GLenum e = glGetError(); + if (e != GL_NO_ERROR) + { + printf("GL Error: "); + switch(e) + { + case GL_INVALID_ENUM: printf("INVALID ENUM"); break; + case GL_INVALID_VALUE: printf("INVALID VALUE"); break; + case GL_INVALID_OPERATION: printf("INVALID OPERATION"); break; + case GL_OUT_OF_MEMORY: printf("OUT OF MEMORY"); break; + } + printf("\n"); + return 1; + } + return 0; +} + +void OGL_UpdateFrameTime() +{ + unsigned ticks = ticksGetTicks(); + static unsigned lastFrameTicks = 0; + for(int i = OGL_FRAMETIME_NUM-1; i > 0; i--) OGL.frameTime[i] = OGL.frameTime[i-1]; + OGL.frameTime[0] = ticks - lastFrameTicks; + lastFrameTicks = ticks; +} + +void OGL_SwapBuffers() +{ + //OGL_DrawTriangles(); + scProgramChanged = 0; +#if 0 + static int frames = 0; + static unsigned lastTicks = 0; + unsigned ticks = ticksGetTicks(); + + frames++; + if (ticks >= (lastTicks + 1000)) + { + + float fps = 1000.0f * (float) frames / (ticks - lastTicks); + LOG(LOG_MINIMAL, "fps = %.2f \n", fps); + LOG(LOG_MINIMAL, "skipped frame = %i of %i \n", OGL.frameSkipped, frames + OGL.frameSkipped); + + OGL.frameSkipped = 0; + +#ifdef BATCH_TEST + LOG(LOG_MINIMAL, "time spent in draw calls per frame = %.2f ms\n", (float)TotalDrawTime / frames); + LOG(LOG_MINIMAL, "average draw calls per frame = %.0f\n", (float)TotalDrawCalls / frames); + LOG(LOG_MINIMAL, "average vertices per draw call = %.2f\n", (float)TotalTriangles / TotalDrawCalls); + TotalDrawCalls = 0; + TotalTriangles = 0; + TotalDrawTime = 0; +#endif + +#ifdef SHADER_TEST + LOG(LOG_MINIMAL, "average shader changes per frame = %f\n", (float)ProgramSwaps / frames); + ProgramSwaps = 0; +#endif + +#ifdef TEXTURECACHE_TEST + LOG(LOG_MINIMAL, "texture cache time per frame: %.2f ms\n", (float)TextureCacheTime/ frames); + LOG(LOG_MINIMAL, "texture cache per frame: hits=%.2f misses=%.2f\n", (float)cache.hits / frames, + (float)cache.misses / frames); + cache.hits = cache.misses = 0; + TextureCacheTime = 0; + +#endif + frames = 0; + lastTicks = ticks; + } +#endif + + +#ifdef PROFILE_GBI + u32 profileTicks = ticksGetTicks(); + static u32 profileLastTicks = 0; + if (profileTicks >= (profileLastTicks + 5000)) + { + LOG(LOG_MINIMAL, "GBI PROFILE DATA: %i ms \n", profileTicks - profileLastTicks); + LOG(LOG_MINIMAL, "=========================================================\n"); + GBI_ProfilePrint(stdout); + LOG(LOG_MINIMAL, "=========================================================\n"); + GBI_ProfileReset(); + profileLastTicks = profileTicks; + } +#endif + + if (config.framebuffer.enable) + { + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glClearColor( 0, 0, 0, 1 ); + glClear( GL_COLOR_BUFFER_BIT ); + + glUseProgram(OGL.defaultProgram); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_DEPTH_TEST); + glViewport(config.window.xpos, config.window.ypos, config.window.width, config.window.height); + + static const float vert[] = + { + -1.0, -1.0, +0.0, +0.0, + +1.0, -1.0, +1.0, +0.0, + -1.0, +1.0, +0.0, +1.0, + +1.0, +1.0, +1.0, +1.0 + }; + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, OGL.framebuffer.color_buffer); + if (config.framebuffer.bilinear) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + } + else + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } + + glEnableVertexAttribArray(0); + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (float*)vert); + glEnableVertexAttribArray(1); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (float*)vert + 2); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + Android_JNI_SwapWindow(); // paulscode, fix for black-screen bug + + glBindFramebuffer(GL_FRAMEBUFFER, OGL.framebuffer.fb); + OGL_UpdateViewport(); + if (scProgramCurrent) glUseProgram(scProgramCurrent->program); + OGL.renderState = RS_NONE; + } + else + { + Android_JNI_SwapWindow(); // paulscode, fix for black-screen bug + } + + // if emulator defined a render callback function, call it before + // buffer swap + if (renderCallback) (*renderCallback)(); + + OGL.screenUpdate = false; + + if (config.forceBufferClear) + { +/////// paulscode, graphics bug-fixes + float depth = gDP.fillColor.z ; + glDisable( GL_SCISSOR_TEST ); + glDepthMask( GL_TRUE ); // fixes side-bar graphics glitches +// glClearDepthf( depth ); // broken on Qualcomm Adreno + glClearDepthf( 1.0f ); // fixes missing graphics on Qualcomm Adreno + glClearColor( 0, 0, 0, 1 ); + glClear( GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT ); + OGL_UpdateDepthUpdate(); + glEnable( GL_SCISSOR_TEST ); +/////// + } + +} + +void OGL_ReadScreen( void *dest, int *width, int *height ) +{ + if (width) + *width = config.framebuffer.width; + if (height) + *height = config.framebuffer.height; + + if (dest == NULL) + return; + + glReadPixels( config.framebuffer.xpos, config.framebuffer.ypos, + config.framebuffer.width, config.framebuffer.height, + GL_RGBA, GL_UNSIGNED_BYTE, dest ); +} + diff --git a/source/gles2n64/src/OpenGL.h b/source/gles2n64/src/OpenGL.h new file mode 100644 index 0000000..3db0743 --- /dev/null +++ b/source/gles2n64/src/OpenGL.h @@ -0,0 +1,171 @@ +#ifndef OPENGL_H +#define OPENGL_H + +#include +#include +#include "gSP.h" + +#ifdef USE_SDL +// #include // Android 2.3 only +// #include + #include +#endif + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef max +#define max(a,b) ((a) > (b) ? (a) : (b)) +#endif + +#define RS_NONE 0 +#define RS_TRIANGLE 1 +#define RS_RECT 2 +#define RS_TEXTUREDRECT 3 +#define RS_LINE 4 + + +#define SCREEN_UPDATE_AT_VI_UPDATE 1 +#define SCREEN_UPDATE_AT_VI_CHANGE 2 +#define SCREEN_UPDATE_AT_CI_CHANGE 3 +#define SCREEN_UPDATE_AT_1ST_CI_CHANGE 4 +#define SCREEN_UPDATE_AT_1ST_PRIMITIVE 5 +#define SCREEN_UPDATE_BEFORE_SCREEN_CLEAR 6 +#define SCREEN_UPDATE_AT_VI_UPDATE_AND_DRAWN 7 + + +#define BLEND_NOOP 0x0000 +#define BLEND_NOOP5 0xcc48 // Fog * 0 + Mem * 1 +#define BLEND_NOOP4 0xcc08 // Fog * 0 + In * 1 +#define BLEND_FOG_ASHADE 0xc800 +#define BLEND_FOG_3 0xc000 // Fog * AIn + In * 1-A +#define BLEND_FOG_MEM 0xc440 // Fog * AFog + Mem * 1-A +#define BLEND_FOG_APRIM 0xc400 // Fog * AFog + In * 1-A +#define BLEND_BLENDCOLOR 0x8c88 +#define BLEND_BI_AFOG 0x8400 // Bl * AFog + In * 1-A +#define BLEND_BI_AIN 0x8040 // Bl * AIn + Mem * 1-A +#define BLEND_MEM 0x4c40 // Mem*0 + Mem*(1-0)?! +#define BLEND_FOG_MEM_3 0x44c0 // Mem * AFog + Fog * 1-A +#define BLEND_NOOP3 0x0c48 // In * 0 + Mem * 1 +#define BLEND_PASS 0x0c08 // In * 0 + In * 1 +#define BLEND_FOG_MEM_IN_MEM 0x0440 // In * AFog + Mem * 1-A +#define BLEND_FOG_MEM_FOG_MEM 0x04c0 // In * AFog + Fog * 1-A +#define BLEND_OPA 0x0044 // In * AIn + Mem * AMem +#define BLEND_XLU 0x0040 +#define BLEND_MEM_ALPHA_IN 0x4044 // Mem * AIn + Mem * AMem + + +#define OGL_FRAMETIME_NUM 8 + +struct GLVertex +{ + float x, y, z, w; + struct + { + float r, g, b, a; + } color, secondaryColor; + float s0, t0, s1, t1; +}; + +struct GLcolor +{ + float r, g, b, a; +}; + +struct GLInfo +{ +#ifdef USE_SDL +// TODO: More EGL stuff, need to do this in Java + SDL_Surface *hScreen; // TODO: Do we really need this? Only using it in one place AFAICT.. +/* + struct + { + EGLint version_major, version_minor; + EGLDisplay display; + EGLContext context; + EGLConfig config; + EGLSurface surface; + EGLNativeDisplayType device; + EGLNativeWindowType handle; + } EGL; +*/ +#endif + + bool screenUpdate; + + struct + { + GLuint fb,depth_buffer, color_buffer; + } framebuffer; + + + int frameSkipped; + unsigned consecutiveSkips; + unsigned frameTime[OGL_FRAMETIME_NUM]; + + int frame_vsync, frame_actual, frame_dl; + int frame_prevdl; + int mustRenderDlist; + int renderingToTexture; + + + GLint defaultProgram; + GLint defaultVertShader; + GLint defaultFragShader; + + float scaleX, scaleY; + +#define INDEXMAP_SIZE 64 +#define VERTBUFF_SIZE 256 +#define ELEMBUFF_SIZE 1024 + + struct { + SPVertex vertices[VERTBUFF_SIZE]; + GLubyte elements[ELEMBUFF_SIZE]; + int num; + +//#ifdef __TRIBUFFER_OPT + + u32 indexmap[INDEXMAP_SIZE]; + u32 indexmapinv[VERTBUFF_SIZE]; + u32 indexmap_prev; + u32 indexmap_nomap; +//#endif + + } triangles; + + + unsigned int renderState; + + GLVertex rect[4]; +}; + +extern GLInfo OGL; + +bool OGL_Start(); +void OGL_Stop(); + +void OGL_AddTriangle(int v0, int v1, int v2); +void OGL_DrawTriangles(); +void OGL_DrawTriangle(SPVertex *vertices, int v0, int v1, int v2); +void OGL_DrawLine(int v0, int v1, float width); +void OGL_DrawRect(int ulx, int uly, int lrx, int lry, float *color); +void OGL_DrawTexturedRect(float ulx, float uly, float lrx, float lry, float uls, float ult, float lrs, float lrt, bool flip ); + +void OGL_UpdateFrameTime(); +void OGL_UpdateScale(); +void OGL_UpdateStates(); +void OGL_UpdateViewport(); +void OGL_UpdateScissor(); +void OGL_UpdateCullFace(); + +void OGL_ClearDepthBuffer(); +void OGL_ClearColorBuffer(float *color); +void OGL_ResizeWindow(int x, int y, int width, int height); +void OGL_SwapBuffers(); +void OGL_ReadScreen( void *dest, int *width, int *height ); + +int OGL_CheckError(); +int OGL_IsExtSupported( const char *extension ); +#endif + diff --git a/source/gles2n64/src/RDP.cpp b/source/gles2n64/src/RDP.cpp new file mode 100644 index 0000000..d3ffdd9 --- /dev/null +++ b/source/gles2n64/src/RDP.cpp @@ -0,0 +1,347 @@ +#include "N64.h" +#include "RSP.h" +#include "GBI.h" +#include "gDP.h" +#include "Types.h" +#include "Debug.h" +#include "Common.h" +#include "gSP.h" + +void RDP_Unknown( u32 w0, u32 w1 ) +{ +} + +void RDP_NoOp( u32 w0, u32 w1 ) +{ + gSPNoOp(); +} + +void RDP_SetCImg( u32 w0, u32 w1 ) +{ + gDPSetColorImage( _SHIFTR( w0, 21, 3 ), // fmt + _SHIFTR( w0, 19, 2 ), // siz + _SHIFTR( w0, 0, 12 ) + 1, // width + w1 ); // img +} + +void RDP_SetZImg( u32 w0, u32 w1 ) +{ + gDPSetDepthImage( w1 ); // img +} + +void RDP_SetTImg( u32 w0, u32 w1 ) +{ + gDPSetTextureImage( _SHIFTR( w0, 21, 3), // fmt + _SHIFTR( w0, 19, 2 ), // siz + _SHIFTR( w0, 0, 12 ) + 1, // width + w1 ); // img +} + +void RDP_SetCombine( u32 w0, u32 w1 ) +{ + gDPSetCombine( _SHIFTR( w0, 0, 24 ), // muxs0 + w1 ); // muxs1 +} + +void RDP_SetEnvColor( u32 w0, u32 w1 ) +{ + gDPSetEnvColor( _SHIFTR( w1, 24, 8 ), // r + _SHIFTR( w1, 16, 8 ), // g + _SHIFTR( w1, 8, 8 ), // b + _SHIFTR( w1, 0, 8 ) ); // a +} + +void RDP_SetPrimColor( u32 w0, u32 w1 ) +{ + gDPSetPrimColor( _SHIFTL( w0, 8, 8 ), // m + _SHIFTL( w0, 0, 8 ), // l + _SHIFTR( w1, 24, 8 ), // r + _SHIFTR( w1, 16, 8 ), // g + _SHIFTR( w1, 8, 8 ), // b + _SHIFTR( w1, 0, 8 ) ); // a + +} + +void RDP_SetBlendColor( u32 w0, u32 w1 ) +{ + gDPSetBlendColor( _SHIFTR( w1, 24, 8 ), // r + _SHIFTR( w1, 16, 8 ), // g + _SHIFTR( w1, 8, 8 ), // b + _SHIFTR( w1, 0, 8 ) ); // a +} + +void RDP_SetFogColor( u32 w0, u32 w1 ) +{ + gDPSetFogColor( _SHIFTR( w1, 24, 8 ), // r + _SHIFTR( w1, 16, 8 ), // g + _SHIFTR( w1, 8, 8 ), // b + _SHIFTR( w1, 0, 8 ) ); // a +} + +void RDP_SetFillColor( u32 w0, u32 w1 ) +{ + gDPSetFillColor( w1 ); +} + +void RDP_FillRect( u32 w0, u32 w1 ) +{ + gDPFillRectangle( _SHIFTR( w1, 14, 10 ), // ulx + _SHIFTR( w1, 2, 10 ), // uly + _SHIFTR( w0, 14, 10 ), // lrx + _SHIFTR( w0, 2, 10 ) ); // lry +} + +void RDP_SetTile( u32 w0, u32 w1 ) +{ + + gDPSetTile( _SHIFTR( w0, 21, 3 ), // fmt + _SHIFTR( w0, 19, 2 ), // siz + _SHIFTR( w0, 9, 9 ), // line + _SHIFTR( w0, 0, 9 ), // tmem + _SHIFTR( w1, 24, 3 ), // tile + _SHIFTR( w1, 20, 4 ), // palette + _SHIFTR( w1, 18, 2 ), // cmt + _SHIFTR( w1, 8, 2 ), // cms + _SHIFTR( w1, 14, 4 ), // maskt + _SHIFTR( w1, 4, 4 ), // masks + _SHIFTR( w1, 10, 4 ), // shiftt + _SHIFTR( w1, 0, 4 ) ); // shifts +} + +void RDP_LoadTile( u32 w0, u32 w1 ) +{ + gDPLoadTile( _SHIFTR( w1, 24, 3 ), // tile + _SHIFTR( w0, 12, 12 ), // uls + _SHIFTR( w0, 0, 12 ), // ult + _SHIFTR( w1, 12, 12 ), // lrs + _SHIFTR( w1, 0, 12 ) ); // lrt +} + +void RDP_LoadBlock( u32 w0, u32 w1 ) +{ + gDPLoadBlock( _SHIFTR( w1, 24, 3 ), // tile + _SHIFTR( w0, 12, 12 ), // uls + _SHIFTR( w0, 0, 12 ), // ult + _SHIFTR( w1, 12, 12 ), // lrs + _SHIFTR( w1, 0, 12 ) ); // dxt +} + +void RDP_SetTileSize( u32 w0, u32 w1 ) +{ + gDPSetTileSize( _SHIFTR( w1, 24, 3 ), // tile + _SHIFTR( w0, 12, 12 ), // uls + _SHIFTR( w0, 0, 12 ), // ult + _SHIFTR( w1, 12, 12 ), // lrs + _SHIFTR( w1, 0, 12 ) ); // lrt +} + +void RDP_LoadTLUT( u32 w0, u32 w1 ) +{ + gDPLoadTLUT( _SHIFTR( w1, 24, 3 ), // tile + _SHIFTR( w0, 12, 12 ), // uls + _SHIFTR( w0, 0, 12 ), // ult + _SHIFTR( w1, 12, 12 ), // lrs + _SHIFTR( w1, 0, 12 ) ); // lrt +} + +void RDP_SetOtherMode( u32 w0, u32 w1 ) +{ + gDPSetOtherMode( _SHIFTR( w0, 0, 24 ), // mode0 + w1 ); // mode1 +} + +void RDP_SetPrimDepth( u32 w0, u32 w1 ) +{ + gDPSetPrimDepth( _SHIFTR( w1, 16, 16 ), // z + _SHIFTR( w1, 0, 16 ) ); // dz +} + +void RDP_SetScissor( u32 w0, u32 w1 ) +{ + gDPSetScissor( _SHIFTR( w1, 24, 2 ), // mode + _FIXED2FLOAT( _SHIFTR( w0, 12, 12 ), 2 ), // ulx + _FIXED2FLOAT( _SHIFTR( w0, 0, 12 ), 2 ), // uly + _FIXED2FLOAT( _SHIFTR( w1, 12, 12 ), 2 ), // lrx + _FIXED2FLOAT( _SHIFTR( w1, 0, 12 ), 2 ) ); // lry +} + +void RDP_SetConvert( u32 w0, u32 w1 ) +{ + gDPSetConvert( _SHIFTR( w0, 13, 9 ), // k0 + _SHIFTR( w0, 4, 9 ), // k1 + _SHIFTL( w0, 5, 4 ) | _SHIFTR( w1, 25, 5 ), // k2 + _SHIFTR( w1, 18, 9 ), // k3 + _SHIFTR( w1, 9, 9 ), // k4 + _SHIFTR( w1, 0, 9 ) ); // k5 +} + +void RDP_SetKeyR( u32 w0, u32 w1 ) +{ + gDPSetKeyR( _SHIFTR( w1, 8, 8 ), // cR + _SHIFTR( w1, 0, 8 ), // sR + _SHIFTR( w1, 16, 12 ) ); // wR +} + +void RDP_SetKeyGB( u32 w0, u32 w1 ) +{ + gDPSetKeyGB( _SHIFTR( w1, 24, 8 ), // cG + _SHIFTR( w1, 16, 8 ), // sG + _SHIFTR( w0, 12, 12 ), // wG + _SHIFTR( w1, 8, 8 ), // cB + _SHIFTR( w1, 0, 8 ), // SB + _SHIFTR( w0, 0, 12 ) ); // wB +} + +void RDP_FullSync( u32 w0, u32 w1 ) +{ + gDPFullSync(); +} + +void RDP_TileSync( u32 w0, u32 w1 ) +{ +// gDPTileSync(); +} + +void RDP_PipeSync( u32 w0, u32 w1 ) +{ +// gDPPipeSync(); +} + +void RDP_LoadSync( u32 w0, u32 w1 ) +{ +// gDPLoadSync(); +} + +void RDP_TexRectFlip( u32 w0, u32 w1 ) +{ + u32 w2 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4]; + RSP.PC[RSP.PCi] += 8; + + u32 w3 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4]; + RSP.PC[RSP.PCi] += 8; + + gDPTextureRectangleFlip( _FIXED2FLOAT( _SHIFTR( w1, 12, 12 ), 2 ), // ulx + _FIXED2FLOAT( _SHIFTR( w1, 0, 12 ), 2 ), // uly + _FIXED2FLOAT( _SHIFTR( w0, 12, 12 ), 2 ), // lrx + _FIXED2FLOAT( _SHIFTR( w0, 0, 12 ), 2 ), // lry + _SHIFTR( w1, 24, 3 ), // tile + _FIXED2FLOAT( (s16)_SHIFTR( w2, 16, 16 ), 5 ), // s + _FIXED2FLOAT( (s16)_SHIFTR( w2, 0, 16 ), 5 ), // t + _FIXED2FLOAT( (s16)_SHIFTR( w3, 16, 16 ), 10 ), // dsdx + _FIXED2FLOAT( (s16)_SHIFTR( w3, 0, 16 ), 10 ) ); // dsdy +} + +void RDP_TexRect( u32 w0, u32 w1 ) +{ + u32 w2 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4]; + RSP.PC[RSP.PCi] += 8; + + u32 w3 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4]; + RSP.PC[RSP.PCi] += 8; + + gDPTextureRectangle( _FIXED2FLOAT( _SHIFTR( w1, 12, 12 ), 2 ), // ulx + _FIXED2FLOAT( _SHIFTR( w1, 0, 12 ), 2 ), // uly + _FIXED2FLOAT( _SHIFTR( w0, 12, 12 ), 2 ), // lrx + _FIXED2FLOAT( _SHIFTR( w0, 0, 12 ), 2 ), // lry + _SHIFTR( w1, 24, 3 ), // tile + _FIXED2FLOAT( (s16)_SHIFTR( w2, 16, 16 ), 5 ), // s + _FIXED2FLOAT( (s16)_SHIFTR( w2, 0, 16 ), 5 ), // t + _FIXED2FLOAT( (s16)_SHIFTR( w3, 16, 16 ), 10 ), // dsdx + _FIXED2FLOAT( (s16)_SHIFTR( w3, 0, 16 ), 10 ) ); // dsdy +} + + +//Low Level RDP Drawing Commands: +void RDP_TriFill(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_FILL Command\n"); +} + +void RDP_TriFillZBuff(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_FILL_ZBUFF Command\n"); +} + +void RDP_TriTxtr(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_TXTR Command\n"); +} + +void RDP_TriTxtrZBuff(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_TXTR_ZBUFF Command\n"); +} + +void RDP_TriShade(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_SHADE Command\n"); +} + +void RDP_TriShadeZBuff(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_SHADE_ZBUFF Command\n"); +} + +void RDP_TriShadeTxtr(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_SHADE_TXTR Command\n"); +} + +void RDP_TriShadeTxtrZBuff(u32 w0, u32 w1) +{ + LOG(LOG_VERBOSE, "RSP_TRI_SHADE_TXTR_ZBUFF Command\n"); +} + +void RDP_Init() +{ + // Initialize RDP commands to RDP_UNKNOWN + for (int i = 0xC8; i <= 0xCF; i++) + GBI.cmd[i] = RDP_Unknown; + + // Initialize RDP commands to RDP_UNKNOWN + for (int i = 0xE4; i <= 0xFF; i++) + GBI.cmd[i] = RDP_Unknown; + + // Set known GBI commands + GBI.cmd[G_NOOP] = RDP_NoOp; + GBI.cmd[G_SETCIMG] = RDP_SetCImg; + GBI.cmd[G_SETZIMG] = RDP_SetZImg; + GBI.cmd[G_SETTIMG] = RDP_SetTImg; + GBI.cmd[G_SETCOMBINE] = RDP_SetCombine; + GBI.cmd[G_SETENVCOLOR] = RDP_SetEnvColor; + GBI.cmd[G_SETPRIMCOLOR] = RDP_SetPrimColor; + GBI.cmd[G_SETBLENDCOLOR] = RDP_SetBlendColor; + GBI.cmd[G_SETFOGCOLOR] = RDP_SetFogColor; + GBI.cmd[G_SETFILLCOLOR] = RDP_SetFillColor; + GBI.cmd[G_FILLRECT] = RDP_FillRect; + GBI.cmd[G_SETTILE] = RDP_SetTile; + GBI.cmd[G_LOADTILE] = RDP_LoadTile; + GBI.cmd[G_LOADBLOCK] = RDP_LoadBlock; + GBI.cmd[G_SETTILESIZE] = RDP_SetTileSize; + GBI.cmd[G_LOADTLUT] = RDP_LoadTLUT; + GBI.cmd[G_RDPSETOTHERMODE] = RDP_SetOtherMode; + GBI.cmd[G_SETPRIMDEPTH] = RDP_SetPrimDepth; + GBI.cmd[G_SETSCISSOR] = RDP_SetScissor; + GBI.cmd[G_SETCONVERT] = RDP_SetConvert; + GBI.cmd[G_SETKEYR] = RDP_SetKeyR; + GBI.cmd[G_SETKEYGB] = RDP_SetKeyGB; + GBI.cmd[G_RDPFULLSYNC] = RDP_FullSync; + GBI.cmd[G_RDPTILESYNC] = RDP_TileSync; + GBI.cmd[G_RDPPIPESYNC] = RDP_PipeSync; + GBI.cmd[G_RDPLOADSYNC] = RDP_LoadSync; + GBI.cmd[G_TEXRECTFLIP] = RDP_TexRectFlip; + GBI.cmd[G_TEXRECT] = RDP_TexRect; + + GBI.cmd[G_RDPNOOP] = RDP_NoOp; + + //Low Level RDP Drawing Commands: + GBI.cmd[G_TRI_FILL] = RDP_TriFill; + GBI.cmd[G_TRI_FILL_ZBUFF] = RDP_TriFillZBuff; + GBI.cmd[G_TRI_TXTR] = RDP_TriTxtr; + GBI.cmd[G_TRI_TXTR_ZBUFF] = RDP_TriTxtrZBuff; + GBI.cmd[G_TRI_SHADE] = RDP_TriShade; + GBI.cmd[G_TRI_SHADE_TXTR] = RDP_TriShadeTxtr; + GBI.cmd[G_TRI_SHADE_TXTR_ZBUFF] = RDP_TriShadeTxtrZBuff; + +} + diff --git a/source/gles2n64/src/RDP.h b/source/gles2n64/src/RDP.h new file mode 100644 index 0000000..73e970b --- /dev/null +++ b/source/gles2n64/src/RDP.h @@ -0,0 +1,7 @@ +#ifndef RDP_H +#define RDP_H + +void RDP_Init(); + +#endif + diff --git a/source/gles2n64/src/RSP.cpp b/source/gles2n64/src/RSP.cpp new file mode 100644 index 0000000..cb4e7d3 --- /dev/null +++ b/source/gles2n64/src/RSP.cpp @@ -0,0 +1,150 @@ +#include +#include "Common.h" +#include "gles2N64.h" +#include "OpenGL.h" +#include "Debug.h" +#include "RSP.h" +#include "RDP.h" +#include "N64.h" +#include "F3D.h" +#include "3DMath.h" +#include "VI.h" +#include "ShaderCombiner.h" +#include "DepthBuffer.h" +#include "GBI.h" +#include "gSP.h" +#include "Textures.h" + +//#define PRINT_DISPLAYLIST +//#define PRINT_DISPLAYLIST_NUM 1 + +RSPInfo RSP; + +void RSP_LoadMatrix( f32 mtx[4][4], u32 address ) +{ + + f32 recip = 1.5258789e-05f; + + struct _N64Matrix + { + s16 integer[4][4]; + u16 fraction[4][4]; + } *n64Mat = (struct _N64Matrix *)&RDRAM[address]; + + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + mtx[i][j] = (GLfloat)(n64Mat->integer[i][j^1]) + (GLfloat)(n64Mat->fraction[i][j^1]) * recip; +} + +void RSP_ProcessDList() +{ + VI_UpdateSize(); + OGL_UpdateScale(); + TextureCache_ActivateNoise(2); + + RSP.PC[0] = *(u32*)&DMEM[0x0FF0]; + RSP.PCi = 0; + RSP.count = 0; + + RSP.halt = FALSE; + RSP.busy = TRUE; + +#ifdef __TRIBUFFER_OPT + __indexmap_clear(); +#endif + + gSP.matrix.stackSize = min( 32, *(u32*)&DMEM[0x0FE4] >> 6 ); + gSP.matrix.modelViewi = 0; + gSP.changed |= CHANGED_MATRIX; + + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + gSP.matrix.modelView[0][i][j] = 0.0f; + + gSP.matrix.modelView[0][0][0] = 1.0f; + gSP.matrix.modelView[0][1][1] = 1.0f; + gSP.matrix.modelView[0][2][2] = 1.0f; + gSP.matrix.modelView[0][3][3] = 1.0f; + + u32 uc_start = *(u32*)&DMEM[0x0FD0]; + u32 uc_dstart = *(u32*)&DMEM[0x0FD8]; + u32 uc_dsize = *(u32*)&DMEM[0x0FDC]; + + if ((uc_start != RSP.uc_start) || (uc_dstart != RSP.uc_dstart)) + gSPLoadUcodeEx( uc_start, uc_dstart, uc_dsize ); + + gDPSetAlphaCompare(G_AC_NONE); + gDPSetDepthSource(G_ZS_PIXEL); + gDPSetRenderMode(0, 0); + gDPSetAlphaDither(G_AD_DISABLE); + gDPSetColorDither(G_CD_DISABLE); + gDPSetCombineKey(G_CK_NONE); + gDPSetTextureConvert(G_TC_FILT); + gDPSetTextureFilter(G_TF_POINT); + gDPSetTextureLUT(G_TT_NONE); + gDPSetTextureLOD(G_TL_TILE); + gDPSetTextureDetail(G_TD_CLAMP); + gDPSetTexturePersp(G_TP_PERSP); + gDPSetCycleType(G_CYC_1CYCLE); + gDPPipelineMode(G_PM_NPRIMITIVE); + +#ifdef PRINT_DISPLAYLIST + if ((RSP.DList%PRINT_DISPLAYLIST_NUM) == 0) LOG(LOG_VERBOSE, "BEGIN DISPLAY LIST %i \n", RSP.DList); +#endif + + while (!RSP.halt) + { + u32 pc = RSP.PC[RSP.PCi]; + + if ((pc + 8) > RDRAMSize) + { +#ifdef DEBUG + DebugMsg( DEBUG_LOW | DEBUG_ERROR, "ATTEMPTING TO EXECUTE RSP COMMAND AT INVALID RDRAM LOCATION\n" ); +#endif + break; + } + + + u32 w0 = *(u32*)&RDRAM[pc]; + u32 w1 = *(u32*)&RDRAM[pc+4]; + RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[pc+8], 24, 8 ); + RSP.cmd = _SHIFTR( w0, 24, 8 ); + RSP.PC[RSP.PCi] += 8; + +#ifdef PROFILE_GBI + GBI_ProfileBegin(RSP.cmd); +#endif + +#ifdef PRINT_DISPLAYLIST + if ((RSP.DList%PRINT_DISPLAYLIST_NUM) == 0) LOG(LOG_VERBOSE, "%s: w0=0x%x w1=0x%x\n", GBI_GetFuncName(GBI.current->type, RSP.cmd), w0, w1); +#endif + + GBI.cmd[RSP.cmd]( w0, w1 ); + +#ifdef PROFILE_GBI + GBI_ProfileEnd(RSP.cmd); +#endif + } + +#ifdef PRINT_DISPLAYLIST + if ((RSP.DList%PRINT_DISPLAYLIST_NUM) == 0) LOG(LOG_VERBOSE, "END DISPLAY LIST %i \n", RSP.DList); +#endif + + RSP.busy = FALSE; + RSP.DList++; + gSP.changed |= CHANGED_COLORBUFFER; +} + +void RSP_Init() +{ + RDRAMSize = 1024 * 1024 * 8; + RSP.DList = 0; + RSP.uc_start = RSP.uc_dstart = 0; + gDP.loadTile = &gDP.tiles[7]; + gSP.textureTile[0] = &gDP.tiles[0]; + gSP.textureTile[1] = &gDP.tiles[1]; + + DepthBuffer_Init(); + GBI_Init(); +} + diff --git a/source/gles2n64/src/RSP.h b/source/gles2n64/src/RSP.h new file mode 100644 index 0000000..33c705a --- /dev/null +++ b/source/gles2n64/src/RSP.h @@ -0,0 +1,34 @@ +#ifndef RSP_H +#define RSP_H + +#include "N64.h" +#include "GBI.h" +//#include "gSP.h" +#include "Types.h" + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#define RSPMSG_CLOSE 0 +#define RSPMSG_UPDATESCREEN 1 +#define RSPMSG_PROCESSDLIST 2 +#define RSPMSG_CAPTURESCREEN 3 +#define RSPMSG_DESTROYTEXTURES 4 +#define RSPMSG_INITTEXTURES 5 + +typedef struct +{ + u32 PC[18], PCi, busy, halt, close, DList, uc_start, uc_dstart, cmd, nextCmd, count; +} RSPInfo; + +extern RSPInfo RSP; + +#define RSP_SegmentToPhysical( segaddr ) ((gSP.segment[(segaddr >> 24) & 0x0F] + (segaddr & 0x00FFFFFF)) & 0x00FFFFFF) + +void RSP_Init(); +void RSP_ProcessDList(); +void RSP_LoadMatrix( f32 mtx[4][4], u32 address ); + +#endif + diff --git a/source/gles2n64/src/S2DEX.cpp b/source/gles2n64/src/S2DEX.cpp new file mode 100644 index 0000000..0131016 --- /dev/null +++ b/source/gles2n64/src/S2DEX.cpp @@ -0,0 +1,102 @@ +#include "OpenGL.h" +#include "S2DEX.h" +#include "F3D.h" +#include "F3DEX.h" +#include "GBI.h" +#include "gSP.h" +#include "gDP.h" +#include "RSP.h" +#include "Types.h" + +void S2DEX_BG_1Cyc( u32 w0, u32 w1 ) +{ + gSPBgRect1Cyc( w1 ); +} + +void S2DEX_BG_Copy( u32 w0, u32 w1 ) +{ + gSPBgRectCopy( w1 ); +} + +void S2DEX_Obj_Rectangle( u32 w0, u32 w1 ) +{ + gSPObjRectangle( w1 ); +} + +void S2DEX_Obj_Sprite( u32 w0, u32 w1 ) +{ + gSPObjSprite( w1 ); +} + +void S2DEX_Obj_MoveMem( u32 w0, u32 w1 ) +{ + if (_SHIFTR( w0, 0, 16 ) == 0) + gSPObjMatrix( w1 ); + else + gSPObjSubMatrix( w1 ); +} + +void S2DEX_Select_DL( u32 w0, u32 w1 ) +{ +} + +void S2DEX_Obj_RenderMode( u32 w0, u32 w1 ) +{ +} + +void S2DEX_Obj_Rectangle_R( u32 w0, u32 w1 ) +{ +} + +void S2DEX_Obj_LoadTxtr( u32 w0, u32 w1 ) +{ + gSPObjLoadTxtr( w1 ); +} + +void S2DEX_Obj_LdTx_Sprite( u32 w0, u32 w1 ) +{ + gSPObjLoadTxSprite( w1 ); +} + +void S2DEX_Obj_LdTx_Rect( u32 w0, u32 w1 ) +{ +} + +void S2DEX_Obj_LdTx_Rect_R( u32 w0, u32 w1 ) +{ + gSPObjLoadTxRectR( w1 ); +} + +void S2DEX_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3DEX ); + + gSP.geometryMode = 0; + + GBI.PCStackSize = 18; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3D_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_BG_1CYC, S2DEX_BG_1CYC, S2DEX_BG_1Cyc ); + GBI_SetGBI( G_BG_COPY, S2DEX_BG_COPY, S2DEX_BG_Copy ); + GBI_SetGBI( G_OBJ_RECTANGLE, S2DEX_OBJ_RECTANGLE, S2DEX_Obj_Rectangle ); + GBI_SetGBI( G_OBJ_SPRITE, S2DEX_OBJ_SPRITE, S2DEX_Obj_Sprite ); + GBI_SetGBI( G_OBJ_MOVEMEM, S2DEX_OBJ_MOVEMEM, S2DEX_Obj_MoveMem ); + GBI_SetGBI( G_DL, F3D_DL, F3D_DList ); + GBI_SetGBI( G_SELECT_DL, S2DEX_SELECT_DL, S2DEX_Select_DL ); + GBI_SetGBI( G_OBJ_RENDERMODE, S2DEX_OBJ_RENDERMODE, S2DEX_Obj_RenderMode ); + GBI_SetGBI( G_OBJ_RECTANGLE_R, S2DEX_OBJ_RECTANGLE_R, S2DEX_Obj_Rectangle_R ); + GBI_SetGBI( G_OBJ_LOADTXTR, S2DEX_OBJ_LOADTXTR, S2DEX_Obj_LoadTxtr ); + GBI_SetGBI( G_OBJ_LDTX_SPRITE, S2DEX_OBJ_LDTX_SPRITE, S2DEX_Obj_LdTx_Sprite ); + GBI_SetGBI( G_OBJ_LDTX_RECT, S2DEX_OBJ_LDTX_RECT, S2DEX_Obj_LdTx_Rect ); + GBI_SetGBI( G_OBJ_LDTX_RECT_R, S2DEX_OBJ_LDTX_RECT_R, S2DEX_Obj_LdTx_Rect_R ); + GBI_SetGBI( G_MOVEWORD, F3D_MOVEWORD, F3D_MoveWord ); + GBI_SetGBI( G_SETOTHERMODE_H, F3D_SETOTHERMODE_H, F3D_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3D_SETOTHERMODE_L, F3D_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3D_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_RDPHALF_1, F3D_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3D_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_LOAD_UCODE, S2DEX_LOAD_UCODE, F3DEX_Load_uCode ); +} + diff --git a/source/gles2n64/src/S2DEX.h b/source/gles2n64/src/S2DEX.h new file mode 100644 index 0000000..03ae7ba --- /dev/null +++ b/source/gles2n64/src/S2DEX.h @@ -0,0 +1,219 @@ +#ifndef S2DEX_H +#define S2DEX_H + +#define G_BGLT_LOADBLOCK 0x0033 +#define G_BGLT_LOADTILE 0xfff4 + +#define G_BG_FLAG_FLIPS 0x01 +#define G_BG_FLAG_FLIPT 0x10 + +struct uObjScaleBg +{ + u16 imageW; /* Texture width (8-byte alignment, u10.2) */ + u16 imageX; /* x-coordinate of upper-left + position of texture (u10.5) */ + u16 frameW; /* Transfer destination frame width (u10.2) */ + s16 frameX; /* x-coordinate of upper-left + position of transfer destination frame (s10.2) */ + + u16 imageH; /* Texture height (u10.2) */ + u16 imageY; /* y-coordinate of upper-left position of + texture (u10.5) */ + u16 frameH; /* Transfer destination frame height (u10.2) */ + s16 frameY; /* y-coordinate of upper-left position of transfer + destination frame (s10.2) */ + + u32 imagePtr; /* Address of texture source in DRAM*/ + u8 imageSiz; /* Texel size + G_IM_SIZ_4b (4 bits/texel) + G_IM_SIZ_8b (8 bits/texel) + G_IM_SIZ_16b (16 bits/texel) + G_IM_SIZ_32b (32 bits/texel) */ + u8 imageFmt; /*Texel format + G_IM_FMT_RGBA (RGBA format) + G_IM_FMT_YUV (YUV format) + G_IM_FMT_CI (CI format) + G_IM_FMT_IA (IA format) + G_IM_FMT_I (I format) */ + u16 imageLoad; /* Method for loading the BG image texture + G_BGLT_LOADBLOCK (use LoadBlock) + G_BGLT_LOADTILE (use LoadTile) */ + u16 imageFlip; /* Image inversion on/off (horizontal + direction only) + 0 (normal display (no inversion)) + G_BG_FLAG_FLIPS (horizontal inversion of texture image) */ + u16 imagePal; /* Position of palette for 4-bit color + index texture (4-bit precision, 0~15) */ + + u16 scaleH; /* y-direction scale value (u5.10) */ + u16 scaleW; /* x-direction scale value (u5.10) */ + s32 imageYorig; /* image drawing origin (s20.5)*/ + + u8 padding[4]; /* Padding */ +}; /* 40 bytes */ + +struct uObjBg +{ + u16 imageW; /* Texture width (8-byte alignment, u10.2) */ + u16 imageX; /* x-coordinate of upper-left position of texture (u10.5) */ + u16 frameW; /* Transfer destination frame width (u10.2) */ + s16 frameX; /* x-coordinate of upper-left position of + transfer destination frame (s10.2) */ + u16 imageH; /* Texture height (u10.2) */ + u16 imageY; /* y-coordinate of upper-left position of + texture (u10.5) */ + u16 frameH; /* Transfer destination frame height (u10.2) */ + s16 frameY; /* y-coordinate of upper-left position of + transfer destination frame (s10.2) */ + + u32 imagePtr; /* Address of texture source in DRAM*/ + u8 imageSiz; /* Texel size + G_IM_SIZ_4b (4 bits/texel) + G_IM_SIZ_8b (8 bits/texel) + G_IM_SIZ_16b (16 bits/texel) + G_IM_SIZ_32b (32 bits/texel) */ + u8 imageFmt; /*Texel format + G_IM_FMT_RGBA (RGBA format) + G_IM_FMT_YUV (YUV format) + G_IM_FMT_CI (CI format) + G_IM_FMT_IA (IA format) + G_IM_FMT_I (I format) */ + u16 imageLoad; /* Method for loading the BG image texture + G_BGLT_LOADBLOCK (use LoadBlock) + G_BGLT_LOADTILE (use LoadTile) */ + u16 imageFlip; /* Image inversion on/off (horizontal direction only) + 0 (normal display (no inversion)) + G_BG_FLAG_FLIPS (horizontal inversion of + texture image) */ + u16 imagePal; /* Position of palette for 4-bit color + index texture (4-bit precision, 0~15) */ + +/* The following is set in the initialization routine guS2DInitBg */ + u16 tmemH; /* TMEM height for a single load (quadruple + value, s13.2) */ + u16 tmemW; /* TMEM width for one frame line (word size) */ + u16 tmemLoadTH; /* TH value or Stride value */ + u16 tmemLoadSH; /* SH value */ + u16 tmemSize; /* imagePtr skip value for a single load */ + u16 tmemSizeW; /* imagePtr skip value for one image line */ +}; /* 40 bytes */ + +struct uObjSprite +{ + u16 scaleW; /* Width-direction scaling (u5.10) */ + s16 objX; /* x-coordinate of upper-left corner of OBJ (s10.2) */ + u16 paddingX; /* Unused (always 0) */ + u16 imageW; /* Texture width (length in s direction, u10.5) */ + u16 scaleH; /* Height-direction scaling (u5.10) */ + s16 objY; /* y-coordinate of upper-left corner of OBJ (s10.2) */ + u16 paddingY; /* Unused (always 0) */ + u16 imageH; /* Texture height (length in t direction, u10.5) */ + u16 imageAdrs; /* Texture starting position in TMEM (In units of 64-bit words) */ + u16 imageStride; /* Texel wrapping width (In units of 64-bit words) */ + u8 imageFlags; /* Display flag + (*) More than one of the following flags can be specified as the bit sum of the flags: + 0 (Normal display (no inversion)) + G_OBJ_FLAG_FLIPS (s-direction (x) inversion) + G_OBJ_FLAG_FLIPT (t-direction (y) inversion) */ + u8 imagePal; /* Position of palette for 4-bit color index texture (4-bit precision, 0~7) */ + u8 imageSiz; /* Texel size + G_IM_SIZ_4b (4 bits/texel) + G_IM_SIZ_8b (8 bits/texel) + G_IM_SIZ_16b (16 bits/texel) + G_IM_SIZ_32b (32 bits/texel) */ + u8 imageFmt; /* Texel format + G_IM_FMT_RGBA (RGBA format) + G_IM_FMT_YUV (YUV format) + G_IM_FMT_CI (CI format) + G_IM_FMT_IA (IA format) + G_IM_FMT_I (I format) */ +}; /* 24 bytes */ + +struct uObjTxtrBlock +{ + u32 type; /* Structure identifier (G_OBJLT_TXTRBLOCK) */ + u32 image; /* Texture source address in DRAM (8-byte alignment) */ + u16 tsize; /* Texture size (specified by GS_TB_TSIZE) */ + u16 tmem; /* TMEM word address where texture will be loaded (8-byte word) */ + u16 sid; /* Status ID (multiple of 4: either 0, 4, 8, or 12) */ + u16 tline; /* Texture line width (specified by GS_TB_TLINE) */ + u32 flag; /* Status flag */ + u32 mask; /* Status mask */ +}; /* 24 bytes */ + +struct uObjTxtrTile +{ + u32 type; /* Structure identifier (G_OBJLT_TXTRTILE) */ + u32 image; /* Texture source address in DRAM (8-byte alignment) */ + u16 twidth; /* Texture width (specified by GS_TT_TWIDTH) */ + u16 tmem; /* TMEM word address where texture will be loaded (8-byte word) */ + u16 sid; /* Status ID (multiple of 4: either 0, 4, 8, or 12) */ + u16 theight;/* Texture height (specified by GS_TT_THEIGHT) */ + u32 flag; /* Status flag */ + u32 mask; /* Status mask */ +}; /* 24 bytes */ + +struct uObjTxtrTLUT +{ + u32 type; /* Structure identifier (G_OBJLT_TLUT) */ + u32 image; /* Texture source address in DRAM */ + u16 pnum; /* Number of palettes to load - 1 */ + u16 phead; /* Palette position at start of load (256~511) */ + u16 sid; /* Status ID (multiple of 4: either 0, 4, 8, or 12) */ + u16 zero; /* Always assign 0 */ + u32 flag; /* Status flag */ + u32 mask; /* Status mask */ +}; /* 24 bytes */ + +typedef union +{ + uObjTxtrBlock block; + uObjTxtrTile tile; + uObjTxtrTLUT tlut; +} uObjTxtr; + +struct uObjTxSprite +{ + uObjTxtr txtr; + uObjSprite sprite; +}; + +struct uObjMtx +{ + s32 A, B, C, D; /* s15.16 */ + s16 Y, X; /* s10.2 */ + u16 BaseScaleY; /* u5.10 */ + u16 BaseScaleX; /* u5.10 */ +}; + +void S2DEX_BG_1Cyc( u32 w0, u32 w1 ); +void S2DEX_BG_Copy( u32 w0, u32 w1 ); +void S2DEX_Obj_Rectangle( u32 w0, u32 w1 ); +void S2DEX_Obj_Sprite( u32 w0, u32 w1 ); +void S2DEX_Obj_MoveMem( u32 w0, u32 w1 ); +void S2DEX_Select_DL( u32 w0, u32 w1 ); +void S2DEX_Obj_RenderMode( u32 w0, u32 w1 ); +void S2DEX_Obj_Rectangle_R( u32 w0, u32 w1 ); +void S2DEX_Obj_LoadTxtr( u32 w0, u32 w1 ); +void S2DEX_Obj_LdTx_Sprite( u32 w0, u32 w1 ); +void S2DEX_Obj_LdTx_Rect( u32 w0, u32 w1 ); +void S2DEX_Obj_LdTx_Rect_R( u32 w0, u32 w1 ); +void S2DEX_Init(); + +#define S2DEX_BG_1CYC 0x01 +#define S2DEX_BG_COPY 0x02 +#define S2DEX_OBJ_RECTANGLE 0x03 +#define S2DEX_OBJ_SPRITE 0x04 +#define S2DEX_OBJ_MOVEMEM 0x05 +#define S2DEX_LOAD_UCODE 0xAF +#define S2DEX_SELECT_DL 0xB0 +#define S2DEX_OBJ_RENDERMODE 0xB1 +#define S2DEX_OBJ_RECTANGLE_R 0xB2 +#define S2DEX_OBJ_LOADTXTR 0xC1 +#define S2DEX_OBJ_LDTX_SPRITE 0xC2 +#define S2DEX_OBJ_LDTX_RECT 0xC3 +#define S2DEX_OBJ_LDTX_RECT_R 0xC4 +#define S2DEX_RDPHALF_0 0xE4 + +#endif + diff --git a/source/gles2n64/src/S2DEX2.cpp b/source/gles2n64/src/S2DEX2.cpp new file mode 100644 index 0000000..18f0850 --- /dev/null +++ b/source/gles2n64/src/S2DEX2.cpp @@ -0,0 +1,45 @@ +#include "OpenGL.h" +#include "S2DEX.h" +#include "S2DEX2.h" +#include "F3D.h" +#include "F3DEX.h" +#include "F3DEX2.h" +#include "GBI.h" +#include "gSP.h" +#include "gDP.h" +#include "RSP.h" +#include "Types.h" + +void S2DEX2_Init() +{ + // Set GeometryMode flags + GBI_InitFlags( F3DEX2 ); + + gSP.geometryMode = 0; + + GBI.PCStackSize = 18; + + // GBI Command Command Value Command Function + GBI_SetGBI( G_SPNOOP, F3DEX2_SPNOOP, F3D_SPNoOp ); + GBI_SetGBI( G_BG_1CYC, S2DEX2_BG_1CYC, S2DEX_BG_1Cyc ); + GBI_SetGBI( G_BG_COPY, S2DEX2_BG_COPY, S2DEX_BG_Copy ); + GBI_SetGBI( G_OBJ_RECTANGLE, S2DEX2_OBJ_RECTANGLE, S2DEX_Obj_Rectangle ); + GBI_SetGBI( G_OBJ_SPRITE, S2DEX2_OBJ_SPRITE, S2DEX_Obj_Sprite ); + GBI_SetGBI( G_OBJ_MOVEMEM, S2DEX2_OBJ_MOVEMEM, S2DEX_Obj_MoveMem ); + GBI_SetGBI( G_DL, F3DEX2_DL, F3D_DList ); + GBI_SetGBI( G_SELECT_DL, S2DEX2_SELECT_DL, S2DEX_Select_DL ); + GBI_SetGBI( G_OBJ_RENDERMODE, S2DEX2_OBJ_RENDERMODE, S2DEX_Obj_RenderMode ); + GBI_SetGBI( G_OBJ_RECTANGLE_R, S2DEX2_OBJ_RECTANGLE_R, S2DEX_Obj_Rectangle_R ); + GBI_SetGBI( G_OBJ_LOADTXTR, S2DEX2_OBJ_LOADTXTR, S2DEX_Obj_LoadTxtr ); + GBI_SetGBI( G_OBJ_LDTX_SPRITE, S2DEX2_OBJ_LDTX_SPRITE, S2DEX_Obj_LdTx_Sprite ); + GBI_SetGBI( G_OBJ_LDTX_RECT, S2DEX2_OBJ_LDTX_RECT, S2DEX_Obj_LdTx_Rect ); + GBI_SetGBI( G_OBJ_LDTX_RECT_R, S2DEX2_OBJ_LDTX_RECT_R, S2DEX_Obj_LdTx_Rect_R ); + GBI_SetGBI( G_MOVEWORD, F3DEX2_MOVEWORD, F3DEX2_MoveWord ); + GBI_SetGBI( G_SETOTHERMODE_H, F3DEX2_SETOTHERMODE_H, F3DEX2_SetOtherMode_H ); + GBI_SetGBI( G_SETOTHERMODE_L, F3DEX2_SETOTHERMODE_L, F3DEX2_SetOtherMode_L ); + GBI_SetGBI( G_ENDDL, F3DEX2_ENDDL, F3D_EndDL ); + GBI_SetGBI( G_RDPHALF_1, F3DEX2_RDPHALF_1, F3D_RDPHalf_1 ); + GBI_SetGBI( G_RDPHALF_2, F3DEX2_RDPHALF_2, F3D_RDPHalf_2 ); + GBI_SetGBI( G_LOAD_UCODE, F3DEX2_LOAD_UCODE, F3DEX_Load_uCode ); +} + diff --git a/source/gles2n64/src/S2DEX2.h b/source/gles2n64/src/S2DEX2.h new file mode 100644 index 0000000..cbe98d7 --- /dev/null +++ b/source/gles2n64/src/S2DEX2.h @@ -0,0 +1,20 @@ +#ifndef S2DEX2_H +#define S2DEX2_H + +void S2DEX2_Init(); + +#define S2DEX2_OBJ_RECTANGLE_R 0xDA +#define S2DEX2_OBJ_MOVEMEM 0xDC +#define S2DEX2_RDPHALF_0 0xE4 +#define S2DEX2_OBJ_RECTANGLE 0x01 +#define S2DEX2_OBJ_SPRITE 0x02 +#define S2DEX2_SELECT_DL 0x04 +#define S2DEX2_OBJ_LOADTXTR 0x05 +#define S2DEX2_OBJ_LDTX_SPRITE 0x06 +#define S2DEX2_OBJ_LDTX_RECT 0x07 +#define S2DEX2_OBJ_LDTX_RECT_R 0x08 +#define S2DEX2_BG_1CYC 0x09 +#define S2DEX2_BG_COPY 0x0A +#define S2DEX2_OBJ_RENDERMODE 0x0B +#endif + diff --git a/source/gles2n64/src/ShaderCombiner.cpp b/source/gles2n64/src/ShaderCombiner.cpp new file mode 100755 index 0000000..86ffa51 --- /dev/null +++ b/source/gles2n64/src/ShaderCombiner.cpp @@ -0,0 +1,844 @@ + +#include +#include "OpenGL.h" +#include "ShaderCombiner.h" +#include "Common.h" +#include "Textures.h" +#include "Config.h" + + +//(sa - sb) * m + a +static const u32 saRGBExpanded[] = +{ + COMBINED, TEXEL0, TEXEL1, PRIMITIVE, + SHADE, ENVIRONMENT, ONE, NOISE, + ZERO, ZERO, ZERO, ZERO, + ZERO, ZERO, ZERO, ZERO +}; + +static const u32 sbRGBExpanded[] = +{ + COMBINED, TEXEL0, TEXEL1, PRIMITIVE, + SHADE, ENVIRONMENT, CENTER, K4, + ZERO, ZERO, ZERO, ZERO, + ZERO, ZERO, ZERO, ZERO +}; + +static const u32 mRGBExpanded[] = +{ + COMBINED, TEXEL0, TEXEL1, PRIMITIVE, + SHADE, ENVIRONMENT, SCALE, COMBINED_ALPHA, + TEXEL0_ALPHA, TEXEL1_ALPHA, PRIMITIVE_ALPHA, SHADE_ALPHA, + ENV_ALPHA, LOD_FRACTION, PRIM_LOD_FRAC, K5, + ZERO, ZERO, ZERO, ZERO, + ZERO, ZERO, ZERO, ZERO, + ZERO, ZERO, ZERO, ZERO, + ZERO, ZERO, ZERO, ZERO +}; + +static const u32 aRGBExpanded[] = +{ + COMBINED, TEXEL0, TEXEL1, PRIMITIVE, + SHADE, ENVIRONMENT, ONE, ZERO +}; + +static const u32 saAExpanded[] = +{ + COMBINED, TEXEL0_ALPHA, TEXEL1_ALPHA, PRIMITIVE_ALPHA, + SHADE_ALPHA, ENV_ALPHA, ONE, ZERO +}; + +static const u32 sbAExpanded[] = +{ + COMBINED, TEXEL0_ALPHA, TEXEL1_ALPHA, PRIMITIVE_ALPHA, + SHADE_ALPHA, ENV_ALPHA, ONE, ZERO +}; + +static const u32 mAExpanded[] = +{ + LOD_FRACTION, TEXEL0_ALPHA, TEXEL1_ALPHA, PRIMITIVE_ALPHA, + SHADE_ALPHA, ENV_ALPHA, PRIM_LOD_FRAC, ZERO, +}; + +static const u32 aAExpanded[] = +{ + COMBINED, TEXEL0_ALPHA, TEXEL1_ALPHA, PRIMITIVE_ALPHA, + SHADE_ALPHA, ENV_ALPHA, ONE, ZERO +}; + +int CCEncodeA[] = {0, 1, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 15, 15, 6, 15 }; +int CCEncodeB[] = {0, 1, 2, 3, 4, 5, 6, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 7, 15, 15, 15 }; +int CCEncodeC[] = {0, 1, 2, 3, 4, 5, 31, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31, 31, 15, 31, 31}; +int CCEncodeD[] = {0, 1, 2, 3, 4, 5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 6, 15}; +int ACEncodeA[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7}; +int ACEncodeB[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7}; +int ACEncodeC[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 6, 7, 7, 7, 7, 7}; +int ACEncodeD[] = {7, 7, 7, 7, 7, 7, 7, 7, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 6, 7}; + +ShaderProgram *scProgramRoot = NULL; +ShaderProgram *scProgramCurrent = NULL; +int scProgramChanged = 0; +int scProgramCount = 0; + +GLint _vertex_shader = 0; + +const char *_frag_header = " \n"\ +"uniform sampler2D uTex0; \n"\ +"uniform sampler2D uTex1; \n"\ +"uniform sampler2D uNoise; \n"\ +"uniform lowp vec4 uEnvColor; \n"\ +"uniform lowp vec4 uPrimColor; \n"\ +"uniform lowp vec4 uFogColor; \n"\ +"uniform highp float uAlphaRef; \n"\ +"uniform lowp float uPrimLODFrac; \n"\ +"uniform lowp float uK4; \n"\ +"uniform lowp float uK5; \n"\ +" \n"\ +"varying lowp float vFactor; \n"\ +"varying lowp vec4 vShadeColor; \n"\ +"varying mediump vec2 vTexCoord0; \n"\ +"varying mediump vec2 vTexCoord1; \n"\ +" \n"\ +"void main() \n"\ +"{ \n"\ +"lowp vec4 lFragColor; \n"; + + +const char *_vert = " \n"\ +"attribute highp vec4 aPosition; \n"\ +"attribute lowp vec4 aColor; \n"\ +"attribute highp vec2 aTexCoord0; \n"\ +"attribute highp vec2 aTexCoord1; \n"\ +" \n"\ +"uniform bool uEnableFog; \n"\ +"uniform float uFogMultiplier, uFogOffset; \n"\ +"uniform float uRenderState; \n"\ +" \n"\ +"uniform mediump vec2 uTexScale; \n"\ +"uniform mediump vec2 uTexOffset[2]; \n"\ +"uniform mediump vec2 uCacheShiftScale[2]; \n"\ +"uniform mediump vec2 uCacheScale[2]; \n"\ +"uniform mediump vec2 uCacheOffset[2]; \n"\ +" \n"\ +"varying lowp float vFactor; \n"\ +"varying lowp vec4 vShadeColor; \n"\ +"varying mediump vec2 vTexCoord0; \n"\ +"varying mediump vec2 vTexCoord1; \n"\ +" \n"\ +"void main() \n"\ +"{ \n"\ +"gl_Position = aPosition; \n"\ +"vShadeColor = aColor; \n"\ +" \n"\ +"if (uRenderState == 1.0) \n"\ +"{ \n"\ +"vTexCoord0 = (aTexCoord0 * (uTexScale[0] * \n"\ +" uCacheShiftScale[0]) + (uCacheOffset[0] - \n"\ +" uTexOffset[0])) * uCacheScale[0]; \n"\ +"vTexCoord1 = (aTexCoord0 * (uTexScale[1] * \n"\ +" uCacheShiftScale[1]) + (uCacheOffset[1] - \n"\ +" uTexOffset[1])) * uCacheScale[1]; \n"\ +"} \n"\ +"else \n"\ +"{ \n"\ +"vTexCoord0 = aTexCoord0; \n"\ +"vTexCoord1 = aTexCoord1; \n"\ +"} \n"\ +" \n"; + +const char * _vertfog = " \n"\ +"if (uEnableFog) \n"\ +"{ \n"\ +"vFactor = max(-1.0, aPosition.z / aPosition.w) \n"\ +" * uFogMultiplier + uFogOffset; \n"\ +"vFactor = clamp(vFactor, 0.0, 1.0); \n"\ +"} \n"; + +const char * _vertzhack = " \n"\ +"if (uRenderState == 1.0) \n"\ +"{ \n"\ +"gl_Position.z = (gl_Position.z + gl_Position.w*9.0) * 0.1; \n"\ +"} \n"; + + +const char * _color_param_str(int param) +{ + switch(param) + { + case COMBINED: return "lFragColor.rgb"; + case TEXEL0: return "lTex0.rgb"; + case TEXEL1: return "lTex1.rgb"; + case PRIMITIVE: return "uPrimColor.rgb"; + case SHADE: return "vShadeColor.rgb"; + case ENVIRONMENT: return "uEnvColor.rgb"; + case CENTER: return "vec3(0.0)"; + case SCALE: return "vec3(0.0)"; + case COMBINED_ALPHA: return "vec3(lFragColor.a)"; + case TEXEL0_ALPHA: return "vec3(lTex0.a)"; + case TEXEL1_ALPHA: return "vec3(lTex1.a)"; + case PRIMITIVE_ALPHA: return "vec3(uPrimColor.a)"; + case SHADE_ALPHA: return "vec3(vShadeColor.a)"; + case ENV_ALPHA: return "vec3(uEnvColor.a)"; + case LOD_FRACTION: return "vec3(0.0)"; + case PRIM_LOD_FRAC: return "vec3(uPrimLODFrac)"; + case NOISE: return "lNoise.rgb"; + case K4: return "vec3(uK4)"; + case K5: return "vec3(uK5)"; + case ONE: return "vec3(1.0)"; + case ZERO: return "vec3(0.0)"; + default: + return "vec3(0.0)"; + } +} + +const char * _alpha_param_str(int param) +{ + switch(param) + { + case COMBINED: return "lFragColor.a"; + case TEXEL0: return "lTex0.a"; + case TEXEL1: return "lTex1.a"; + case PRIMITIVE: return "uPrimColor.a"; + case SHADE: return "vShadeColor.a"; + case ENVIRONMENT: return "uEnvColor.a"; + case CENTER: return "0.0"; + case SCALE: return "0.0"; + case COMBINED_ALPHA: return "lFragColor.a"; + case TEXEL0_ALPHA: return "lTex0.a"; + case TEXEL1_ALPHA: return "lTex1.a"; + case PRIMITIVE_ALPHA: return "uPrimColor.a"; + case SHADE_ALPHA: return "vShadeColor.a"; + case ENV_ALPHA: return "uEnvColor.a"; + case LOD_FRACTION: return "0.0"; + case PRIM_LOD_FRAC: return "uPrimLODFrac"; + case NOISE: return "lNoise.a"; + case K4: return "uK4"; + case K5: return "uK5"; + case ONE: return "1.0"; + case ZERO: return "0.0"; + default: + return "0.0"; + } +} + +#define MAX_CACHE 16 +ShaderProgram* prog_cache[MAX_CACHE]; +u64 mux_cache[MAX_CACHE]; +int flag_cache[MAX_CACHE]; +int old_cache[MAX_CACHE]; +static int cache_turn=0; + +DecodedMux::DecodedMux(u64 mux, bool cycle2) +{ + combine.mux = mux; + flags = 0; + + //set to ZERO. + for(int i=0;i<4;i++) + for(int j=0; j< 4; j++) + decode[i][j] = ZERO; + + //rgb cycle 0 + decode[0][0] = saRGBExpanded[combine.saRGB0]; + decode[0][1] = sbRGBExpanded[combine.sbRGB0]; + decode[0][2] = mRGBExpanded[combine.mRGB0]; + decode[0][3] = aRGBExpanded[combine.aRGB0]; + decode[1][0] = saAExpanded[combine.saA0]; + decode[1][1] = sbAExpanded[combine.sbA0]; + decode[1][2] = mAExpanded[combine.mA0]; + decode[1][3] = aAExpanded[combine.aA0]; + if (cycle2) + { + //rgb cycle 1 + decode[2][0] = saRGBExpanded[combine.saRGB1]; + decode[2][1] = sbRGBExpanded[combine.sbRGB1]; + decode[2][2] = mRGBExpanded[combine.mRGB1]; + decode[2][3] = aRGBExpanded[combine.aRGB1]; + decode[3][0] = saAExpanded[combine.saA1]; + decode[3][1] = sbAExpanded[combine.sbA1]; + decode[3][2] = mAExpanded[combine.mA1]; + decode[3][3] = aAExpanded[combine.aA1]; + + //texel 0/1 are swapped in 2nd cycle. + swap(1, TEXEL0, TEXEL1); + swap(1, TEXEL0_ALPHA, TEXEL1_ALPHA); + } + + //simplifying mux: + if (replace(G_CYC_1CYCLE, LOD_FRACTION, ZERO) || replace(G_CYC_2CYCLE, LOD_FRACTION, ZERO)) + LOG(LOG_VERBOSE, "SC Replacing LOD_FRACTION with ZERO\n"); +#if 1 + if (replace(G_CYC_1CYCLE, K4, ZERO) || replace(G_CYC_2CYCLE, K4, ZERO)) + LOG(LOG_VERBOSE, "SC Replacing K4 with ZERO\n"); + + if (replace(G_CYC_1CYCLE, K5, ZERO) || replace(G_CYC_2CYCLE, K5, ZERO)) + LOG(LOG_VERBOSE, "SC Replacing K5 with ZERO\n"); +#endif + + if (replace(G_CYC_1CYCLE, CENTER, ZERO) || replace(G_CYC_2CYCLE, CENTER, ZERO)) + LOG(LOG_VERBOSE, "SC Replacing CENTER with ZERO\n"); + + if (replace(G_CYC_1CYCLE, SCALE, ZERO) || replace(G_CYC_2CYCLE, SCALE, ZERO)) + LOG(LOG_VERBOSE, "SC Replacing SCALE with ZERO\n"); + + //Combiner has initial value of zero in cycle 0 + if (replace(G_CYC_1CYCLE, COMBINED, ZERO)) + LOG(LOG_VERBOSE, "SC Setting CYCLE1 COMBINED to ZERO\n"); + + if (replace(G_CYC_1CYCLE, COMBINED_ALPHA, ZERO)) + LOG(LOG_VERBOSE, "SC Setting CYCLE1 COMBINED_ALPHA to ZERO\n"); + + if (!config.enableNoise) + { + if (replace(G_CYC_1CYCLE, NOISE, ZERO)) + LOG(LOG_VERBOSE, "SC Setting CYCLE1 NOISE to ZERO\n"); + + if (replace(G_CYC_2CYCLE, NOISE, ZERO)) + LOG(LOG_VERBOSE, "SC Setting CYCLE2 NOISE to ZERO\n"); + + } + + //mutiplying by zero: (A-B)*0 + C = C + for(int i=0 ; i<4; i++) + { + if (decode[i][2] == ZERO) + { + decode[i][0] = ZERO; + decode[i][1] = ZERO; + } + } + + //(A1-B1)*C1 + D1 + //(A2-B2)*C2 + D2 + //1. ((A1-B1)*C1 + D1 - B2)*C2 + D2 = A1*C1*C2 - B1*C1*C2 + D1*C2 - B2*C2 + D2 + //2. (A2 - (A1-B1)*C1 - D1)*C2 + D2 = A2*C2 - A1*C1*C2 + B1*C1*C2 - D1*C2 + D2 + //3. (A2 - B2)*((A1-B1)*C1 + D1) + D2 = A2*A1*C1 - A2*B1*C1 + A2*D1 - B2*A1*C1 + B2*B1*C1 - B2*D1 + D2 + //4. (A2-B2)*C2 + (A1-B1)*C1 + D1 = A2*C2 - B2*C2 + A1*C1 - B1*C1 + D1 + + if (cycle2) + { + + if (!find(2, COMBINED)) + flags |= SC_IGNORE_RGB0; + + if (!(find(2, COMBINED_ALPHA) || find(3, COMBINED_ALPHA) || find(3, COMBINED))) + flags |= SC_IGNORE_ALPHA0; + + if (decode[2][0] == ZERO && decode[2][1] == ZERO && decode[2][2] == ZERO && decode[2][3] == COMBINED) + { + flags |= SC_IGNORE_RGB1; + } + + if (decode[3][0] == ZERO && decode[3][1] == ZERO && decode[3][2] == ZERO && + (decode[3][3] == COMBINED_ALPHA || decode[3][3] == COMBINED)) + { + flags |= SC_IGNORE_ALPHA1; + } + + } +} + +bool DecodedMux::find(int index, int src) +{ + for(int j=0;j<4;j++) + { + if (decode[index][j] == src) return true; + } + return false; +} + +bool DecodedMux::replace(int cycle, int src, int dest) +{ + int r = false; + for(int i=0;i<2;i++) + { + int ii = (cycle == 0) ? i : (2+i); + for(int j=0;j<4;j++) + { + if (decode[ii][j] == src) {decode[ii][j] = dest; r=true;} + } + } + return r; +} + +bool DecodedMux::swap(int cycle, int src0, int src1) +{ + int r = false; + for(int i=0;i<2;i++) + { + int ii = (cycle == 0) ? i : (2+i); + for(int j=0;j<4;j++) + { + if (decode[ii][j] == src0) {decode[ii][j] = src1; r=true;} + else if (decode[ii][j] == src1) {decode[ii][j] = src0; r=true;} + } + } + return r; +} + +void DecodedMux::hack() +{ + if (config.hackZelda) + { + if(combine.mux == 0xfffd923800ffadffLL) + { + replace(G_CYC_1CYCLE, TEXEL1, TEXEL0); + replace(G_CYC_2CYCLE, TEXEL1, TEXEL0); + } + else if (combine.mux == 0xff5bfff800121603LL) + { + replace(G_CYC_1CYCLE, TEXEL1, ZERO); + replace(G_CYC_2CYCLE, TEXEL1, ZERO); + } + } + +} + + +int _program_compare(ShaderProgram *prog, DecodedMux *dmux, u32 flags) +{ + if (prog) + return ((prog->combine.mux == dmux->combine.mux) && (prog->flags == flags)); + else + return 1; +} + +void _glcompiler_error(GLint shader) +{ + int len, i; + char* log; + + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &len); + log = (char*) malloc(len + 1); + glGetShaderInfoLog(shader, len, &i, log); + log[len] = 0; + LOG(LOG_ERROR, "COMPILE ERROR: %s \n", log); + free(log); +} + +void _gllinker_error(GLint program) +{ + int len, i; + char* log; + + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &len); + log = (char*) malloc(len + 1); + glGetProgramInfoLog(program, len, &i, log); + log[len] = 0; + LOG(LOG_ERROR, "LINK ERROR: %s \n", log); + free(log); +}; + +void _locate_attributes(ShaderProgram *p) +{ + glBindAttribLocation(p->program, SC_POSITION, "aPosition"); + glBindAttribLocation(p->program, SC_COLOR, "aColor"); + glBindAttribLocation(p->program, SC_TEXCOORD0, "aTexCoord0"); + glBindAttribLocation(p->program, SC_TEXCOORD1, "aTexCoord1"); +}; + +#define LocateUniform(A) \ + p->uniforms.A.loc = glGetUniformLocation(p->program, #A); + +void _locate_uniforms(ShaderProgram *p) +{ + LocateUniform(uTex0); + LocateUniform(uTex1); + LocateUniform(uNoise); + LocateUniform(uEnvColor); + LocateUniform(uPrimColor); + LocateUniform(uPrimLODFrac); + LocateUniform(uK4); + LocateUniform(uK5); + LocateUniform(uFogColor); + LocateUniform(uEnableFog); + LocateUniform(uRenderState); + LocateUniform(uFogMultiplier); + LocateUniform(uFogOffset); + LocateUniform(uAlphaRef); + LocateUniform(uTexScale); + LocateUniform(uTexOffset[0]); + LocateUniform(uTexOffset[1]); + LocateUniform(uCacheShiftScale[0]); + LocateUniform(uCacheShiftScale[1]); + LocateUniform(uCacheScale[0]); + LocateUniform(uCacheScale[1]); + LocateUniform(uCacheOffset[0]); + LocateUniform(uCacheOffset[1]); +} + +void _force_uniforms() +{ + SC_ForceUniform1i(uTex0, 0); + SC_ForceUniform1i(uTex1, 1); + SC_ForceUniform1i(uNoise, 2); + SC_ForceUniform4fv(uEnvColor, &gDP.envColor.r); + SC_ForceUniform4fv(uPrimColor, &gDP.primColor.r); + SC_ForceUniform1f(uPrimLODFrac, gDP.primColor.l); + SC_ForceUniform1f(uK4, gDP.convert.k4); + SC_ForceUniform1f(uK5, gDP.convert.k5); + SC_ForceUniform4fv(uFogColor, &gDP.fogColor.r); + SC_ForceUniform1i(uEnableFog, ((config.enableFog==1) && (gSP.geometryMode & G_FOG))); + SC_ForceUniform1f(uRenderState, OGL.renderState); + SC_ForceUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f); + SC_ForceUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f); + SC_ForceUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5 : gDP.blendColor.a); + SC_ForceUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet); + + if (gSP.textureTile[0]){ + SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult); + } else { + SC_ForceUniform2f(uTexOffset[0], 0.0f, 0.0f); + } + + if (gSP.textureTile[1]) + { + SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult); + } + else + { + SC_ForceUniform2f(uTexOffset[1], 0.0f, 0.0f); + } + + if (cache.current[0]) + { + SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT); + SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT); + SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT); + } + else + { + SC_ForceUniform2f(uCacheShiftScale[0], 1.0f, 1.0f); + SC_ForceUniform2f(uCacheScale[0], 1.0f, 1.0f); + SC_ForceUniform2f(uCacheOffset[0], 0.0f, 0.0f); + } + + if (cache.current[1]) + { + SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT); + SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT); + SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT); + } + else + { + SC_ForceUniform2f(uCacheShiftScale[1], 1.0f, 1.0f); + SC_ForceUniform2f(uCacheScale[1], 1.0f, 1.0f); + SC_ForceUniform2f(uCacheOffset[1], 0.0f, 0.0f); + } +} + +void _update_uniforms() +{ + SC_SetUniform4fv(uEnvColor, &gDP.envColor.r); + SC_SetUniform4fv(uPrimColor, &gDP.primColor.r); + SC_SetUniform1f(uPrimLODFrac, gDP.primColor.l); + SC_SetUniform4fv(uFogColor, &gDP.fogColor.r); + SC_SetUniform1i(uEnableFog, (config.enableFog && (gSP.geometryMode & G_FOG))); + SC_SetUniform1f(uRenderState, OGL.renderState); + SC_SetUniform1f(uFogMultiplier, (float) gSP.fog.multiplier / 255.0f); + SC_SetUniform1f(uFogOffset, (float) gSP.fog.offset / 255.0f); + SC_SetUniform1f(uAlphaRef, (gDP.otherMode.cvgXAlpha) ? 0.5 : gDP.blendColor.a); + SC_SetUniform1f(uK4, gDP.convert.k4); + SC_SetUniform1f(uK5, gDP.convert.k5); + + //for some reason i must force these... + SC_ForceUniform2f(uTexScale, gSP.texture.scales, gSP.texture.scalet); + if (scProgramCurrent->usesT0) + { + if (gSP.textureTile[0]) + { + SC_ForceUniform2f(uTexOffset[0], gSP.textureTile[0]->fuls, gSP.textureTile[0]->fult); + } + if (cache.current[0]) + { + SC_ForceUniform2f(uCacheShiftScale[0], cache.current[0]->shiftScaleS, cache.current[0]->shiftScaleT); + SC_ForceUniform2f(uCacheScale[0], cache.current[0]->scaleS, cache.current[0]->scaleT); + SC_ForceUniform2f(uCacheOffset[0], cache.current[0]->offsetS, cache.current[0]->offsetT); + } + } + + if (scProgramCurrent->usesT1) + { + if (gSP.textureTile[1]) + { + SC_ForceUniform2f(uTexOffset[1], gSP.textureTile[1]->fuls, gSP.textureTile[1]->fult); + } + if (cache.current[1]) + { + SC_ForceUniform2f(uCacheShiftScale[1], cache.current[1]->shiftScaleS, cache.current[1]->shiftScaleT); + SC_ForceUniform2f(uCacheScale[1], cache.current[1]->scaleS, cache.current[1]->scaleT); + SC_ForceUniform2f(uCacheOffset[1], cache.current[1]->offsetS, cache.current[1]->offsetT); + } + } +}; + +void ShaderCombiner_Init() +{ + //compile vertex shader: + GLint success; + const char *src[1]; + char buff[4096]; + char *str = buff; + + str += sprintf(str, "%s", _vert); + if (config.enableFog) + { + str += sprintf(str, "%s", _vertfog); + } + if (config.zHack) + { + str += sprintf(str, "%s", _vertzhack); + } + + str += sprintf(str, "}\n\n"); + +#ifdef PRINT_SHADER + LOG(LOG_VERBOSE, "=============================================================\n"); + LOG(LOG_VERBOSE, "Vertex Shader:\n"); + LOG(LOG_VERBOSE, "=============================================================\n"); + LOG(LOG_VERBOSE, "%s", buff); + LOG(LOG_VERBOSE, "=============================================================\n"); +#endif + + src[0] = buff; + _vertex_shader = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(_vertex_shader, 1, (const char**) src, NULL); + glCompileShader(_vertex_shader); + glGetShaderiv(_vertex_shader, GL_COMPILE_STATUS, &success); + if (!success) + { + _glcompiler_error(_vertex_shader); + } + + // prepare prog cache + for (int i=0; ileft); + ShaderCombiner_DeletePrograms(prog->right); + glDeleteProgram(prog->program); + //glDeleteShader(prog->fragment); + free(prog); + scProgramCount--; + } +} + +void ShaderCombiner_Destroy() +{ + ShaderCombiner_DeletePrograms(scProgramRoot); + glDeleteShader(_vertex_shader); + scProgramCount = scProgramChanged = 0; + scProgramRoot = scProgramCurrent = NULL; +} + +void ShaderCombiner_Set(u64 mux, int flags) +{ + //banjo tooie hack + if ((gDP.otherMode.cycleType == G_CYC_1CYCLE) && (mux == 0x00ffe7ffffcf9fcfLL)) + { + mux = EncodeCombineMode( 0, 0, 0, 0, TEXEL0, 0, PRIMITIVE, 0, + 0, 0, 0, 0, TEXEL0, 0, PRIMITIVE, 0 ); + } + + //determine flags + if (flags == -1) + { + flags = 0; + if ((config.enableFog) && (gSP.geometryMode & G_FOG)) + flags |= SC_FOGENABLED; + + if (config.enableAlphaTest) + { + if ((gDP.otherMode.alphaCompare == G_AC_THRESHOLD) && !(gDP.otherMode.alphaCvgSel)){ + flags |= SC_ALPHAENABLED; + if (gDP.blendColor.a > 0.0f) flags |= SC_ALPHAGREATER; + } else if (gDP.otherMode.cvgXAlpha){ + flags |= SC_ALPHAENABLED; + flags |= SC_ALPHAGREATER; + } + } + + if (gDP.otherMode.cycleType == G_CYC_2CYCLE) + flags |= SC_2CYCLE; + } + + + DecodedMux dmux(mux, flags&SC_2CYCLE); + dmux.hack(); + + //if already bound: + if (scProgramCurrent) + { + if (_program_compare(scProgramCurrent, &dmux, flags)) + { + scProgramChanged = 0; + return; + } + } + + //traverse binary tree for cached programs + scProgramChanged = 1; + ShaderProgram *root = scProgramRoot; + ShaderProgram *prog = root; + while(!_program_compare(prog, &dmux, flags)) + { + root = prog; + if (prog->combine.mux < dmux.combine.mux) + prog = prog->right; + else + prog = prog->left; + } + + //build new program + if (!prog) + { + scProgramCount++; + prog = ShaderCombiner_Compile(&dmux, flags); + if (!root) + scProgramRoot = prog; + else if (root->combine.mux < dmux.combine.mux) + root->right = prog; + else + root->left = prog; + + } + + prog->lastUsed = OGL.frame_dl; + scProgramCurrent = prog; + glUseProgram(prog->program); + _force_uniforms(); +} + +ShaderProgram *ShaderCombiner_Compile(DecodedMux *dmux, int flags) +{ + GLint success; + char frag[4096]; + char *buffer = frag; + ShaderProgram *prog = (ShaderProgram*) malloc(sizeof(ShaderProgram)); + + prog->left = prog->right = NULL; + prog->usesT0 = prog->usesT1 = prog->usesCol = prog->usesNoise = 0; + prog->combine = dmux->combine; + prog->flags = flags; + prog->vertex = _vertex_shader; + + for(int i=0; i < ((flags & SC_2CYCLE) ? 4 : 2); i++) + { + //make sure were not ignoring cycle: + if ((dmux->flags&(1<usesT0 |= (dmux->decode[i][j] == TEXEL0 || dmux->decode[i][j] == TEXEL0_ALPHA); + prog->usesT1 |= (dmux->decode[i][j] == TEXEL1 || dmux->decode[i][j] == TEXEL1_ALPHA); + prog->usesCol |= (dmux->decode[i][j] == SHADE || dmux->decode[i][j] == SHADE_ALPHA); + prog->usesNoise |= (dmux->decode[i][j] == NOISE); + } + } + } + + buffer += sprintf(buffer, "%s", _frag_header); + if (prog->usesT0) + buffer += sprintf(buffer, "lowp vec4 lTex0 = texture2D(uTex0, vTexCoord0); \n"); + if (prog->usesT1) + buffer += sprintf(buffer, "lowp vec4 lTex1 = texture2D(uTex1, vTexCoord1); \n"); + if (prog->usesNoise) + buffer += sprintf(buffer, "lowp vec4 lNoise = texture2D(uNoise, (1.0 / 1024.0) * gl_FragCoord.st); \n"); + + for(int i = 0; i < ((flags & SC_2CYCLE) ? 2 : 1); i++) + { + if ((dmux->flags&(1<<(i*2))) == 0) + { + buffer += sprintf(buffer, "lFragColor.rgb = (%s - %s) * %s + %s; \n", + _color_param_str(dmux->decode[i*2][0]), + _color_param_str(dmux->decode[i*2][1]), + _color_param_str(dmux->decode[i*2][2]), + _color_param_str(dmux->decode[i*2][3]) + ); + } + + if ((dmux->flags&(1<<(i*2+1))) == 0) + { + buffer += sprintf(buffer, "lFragColor.a = (%s - %s) * %s + %s; \n", + _alpha_param_str(dmux->decode[i*2+1][0]), + _alpha_param_str(dmux->decode[i*2+1][1]), + _alpha_param_str(dmux->decode[i*2+1][2]), + _alpha_param_str(dmux->decode[i*2+1][3]) + ); + } + buffer += sprintf(buffer, "gl_FragColor = lFragColor; \n"); + }; + + //fog + if (flags&SC_FOGENABLED) + { + buffer += sprintf(buffer, "gl_FragColor = mix(gl_FragColor, uFogColor, vFactor); \n"); + } + + //alpha function + if (flags&SC_ALPHAENABLED) + { + if (flags&SC_ALPHAGREATER) + buffer += sprintf(buffer, "if (gl_FragColor.a < uAlphaRef) %s;\n", config.hackAlpha ? "gl_FragColor.a = 0" : "discard"); + else + buffer += sprintf(buffer, "if (gl_FragColor.a <= uAlphaRef) %s;\n", config.hackAlpha ? "gl_FragColor.a = 0" : "discard"); + } + buffer += sprintf(buffer, "} \n\n"); + *buffer = 0; + +#ifdef PRINT_SHADER + LOG(LOG_VERBOSE, "=============================================================\n"); + LOG(LOG_VERBOSE, "Combine=0x%llx flags=0x%x dmux flags=0x%x\n", prog->combine.mux, flags, dmux->flags); + LOG(LOG_VERBOSE, "Num=%i \t usesT0=%i usesT1=%i usesCol=%i usesNoise=%i\n", scProgramCount, prog->usesT0, prog->usesT1, prog->usesCol, prog->usesNoise); + LOG(LOG_VERBOSE, "=============================================================\n"); + LOG(LOG_VERBOSE, "%s", frag); + LOG(LOG_VERBOSE, "=============================================================\n"); +#endif + + prog->program = glCreateProgram(); + + //Compile: + char *src[1]; + src[0] = frag; + GLint len[1]; + len[0] = min(4096, strlen(frag)); + prog->fragment = glCreateShader(GL_FRAGMENT_SHADER); + + glShaderSource(prog->fragment, 1, (const char**) src, len); + glCompileShader(prog->fragment); + + + glGetShaderiv(prog->fragment, GL_COMPILE_STATUS, &success); + if (!success) + { + _glcompiler_error(prog->fragment); + } + + //link + _locate_attributes(prog); + glAttachShader(prog->program, prog->fragment); + glAttachShader(prog->program, prog->vertex); + glLinkProgram(prog->program); + glGetProgramiv(prog->program, GL_LINK_STATUS, &success); + if (!success) + { + _gllinker_error(prog->program); + } + + //remove fragment shader: + glDeleteShader(prog->fragment); + + _locate_uniforms(prog); + return prog; +} + diff --git a/source/gles2n64/src/ShaderCombiner.h b/source/gles2n64/src/ShaderCombiner.h new file mode 100644 index 0000000..c036389 --- /dev/null +++ b/source/gles2n64/src/ShaderCombiner.h @@ -0,0 +1,258 @@ + +#ifndef SHADERCOMBINER_H +#define SHADERCOMBINER_H + +#define PRINT_SHADER +#define UNIFORM_CHECK + +#define SC_FOGENABLED 0x1 +#define SC_ALPHAENABLED 0x2 +#define SC_ALPHAGREATER 0x4 +#define SC_2CYCLE 0x8 + +#define SC_POSITION 1 +#define SC_COLOR 2 +#define SC_TEXCOORD0 3 +#define SC_TEXCOORD1 4 + +#ifdef UNIFORM_CHECK +#define SC_SetUniform1i(A, B) \ + if (scProgramCurrent->uniforms.A.val != B) \ + {scProgramCurrent->uniforms.A.val = B; glUniform1i(scProgramCurrent->uniforms.A.loc, B);} + +#define SC_SetUniform1f(A, B) \ + if (scProgramCurrent->uniforms.A.val != B) \ + {scProgramCurrent->uniforms.A.val = B; glUniform1f(scProgramCurrent->uniforms.A.loc, B);} + +#define SC_SetUniform4fv(A, B) \ + if ((scProgramCurrent->uniforms.A.val[0] != (B)[0]) || (scProgramCurrent->uniforms.A.val[1] != (B)[1]) || \ + (scProgramCurrent->uniforms.A.val[2] != (B)[2]) || (scProgramCurrent->uniforms.A.val[3] != (B)[3])) \ + {memcpy(scProgramCurrent->uniforms.A.val, B, 16); glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B);} + +#define SC_SetUniform2f(A, B, C) \ + if ((scProgramCurrent->uniforms.A.val[0] != B) || (scProgramCurrent->uniforms.A.val[1] != C)) \ + {scProgramCurrent->uniforms.A.val[0] = B; scProgramCurrent->uniforms.A.val[1] = C; glUniform2f(scProgramCurrent->uniforms.A.loc, B, C);} + +#define SC_ForceUniform1i(A, B) \ + {scProgramCurrent->uniforms.A.val = B; glUniform1i(scProgramCurrent->uniforms.A.loc, B);} + +#define SC_ForceUniform1f(A, B) \ + {scProgramCurrent->uniforms.A.val = B; glUniform1f(scProgramCurrent->uniforms.A.loc, B);} + +#define SC_ForceUniform4fv(A, B) \ + {memcpy(scProgramCurrent->uniforms.A.val, B, 16); glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B);} + +#define SC_ForceUniform2f(A, B, C) \ + {scProgramCurrent->uniforms.A.val[0] = B; scProgramCurrent->uniforms.A.val[1] = C; glUniform2f(scProgramCurrent->uniforms.A.loc, B, C);} + +#else +#define SC_SetUniform1i(A, B) glUniform1i(scProgramCurrent->uniforms.A.loc, B) +#define SC_SetUniform1f(A, B) glUniform1f(scProgramCurrent->uniforms.A.loc, B) +#define SC_SetUniform4fv(A, B) glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B) +#define SC_SetUniform2f(A, B, C) glUniform2f(scProgramCurrent->uniforms.A.loc, B, C) +#define SC_ForceUniform1i(A, B) glUniform1i(scProgramCurrent->uniforms.A.loc, B) +#define SC_ForceUniform1f(A, B) glUniform1f(scProgramCurrent->uniforms.A.loc, B) +#define SC_ForceUniform4fv(A, B) glUniform4fv(scProgramCurrent->uniforms.A.loc, 1, B) +#define SC_ForceUniform2f(A, B, C) glUniform2f(scProgramCurrent->uniforms.A.loc, B, C) +#endif + +/* Color combiner constants: */ +#define G_CCMUX_COMBINED 0 +#define G_CCMUX_TEXEL0 1 +#define G_CCMUX_TEXEL1 2 +#define G_CCMUX_PRIMITIVE 3 +#define G_CCMUX_SHADE 4 +#define G_CCMUX_ENVIRONMENT 5 +#define G_CCMUX_CENTER 6 +#define G_CCMUX_SCALE 6 +#define G_CCMUX_COMBINED_ALPHA 7 +#define G_CCMUX_TEXEL0_ALPHA 8 +#define G_CCMUX_TEXEL1_ALPHA 9 +#define G_CCMUX_PRIMITIVE_ALPHA 10 +#define G_CCMUX_SHADE_ALPHA 11 +#define G_CCMUX_ENV_ALPHA 12 +#define G_CCMUX_LOD_FRACTION 13 +#define G_CCMUX_PRIM_LOD_FRAC 14 +#define G_CCMUX_NOISE 7 +#define G_CCMUX_K4 7 +#define G_CCMUX_K5 15 +#define G_CCMUX_1 6 +#define G_CCMUX_0 31 + +/* Alpha combiner constants: */ +#define G_ACMUX_COMBINED 0 +#define G_ACMUX_TEXEL0 1 +#define G_ACMUX_TEXEL1 2 +#define G_ACMUX_PRIMITIVE 3 +#define G_ACMUX_SHADE 4 +#define G_ACMUX_ENVIRONMENT 5 +#define G_ACMUX_LOD_FRACTION 0 +#define G_ACMUX_PRIM_LOD_FRAC 6 +#define G_ACMUX_1 6 +#define G_ACMUX_0 7 + +#define EncodeCombineMode( a0, b0, c0, d0, Aa0, Ab0, Ac0, Ad0, \ + a1, b1, c1, d1, Aa1, Ab1, Ac1, Ad1 ) \ + (u64)(((u64)(_SHIFTL( G_CCMUX_##a0, 20, 4 ) | _SHIFTL( G_CCMUX_##c0, 15, 5 ) | \ + _SHIFTL( G_ACMUX_##Aa0, 12, 3 ) | _SHIFTL( G_ACMUX_##Ac0, 9, 3 ) | \ + _SHIFTL( G_CCMUX_##a1, 5, 4 ) | _SHIFTL( G_CCMUX_##c1, 0, 5 )) << 32) | \ + (u64)(_SHIFTL( G_CCMUX_##b0, 28, 4 ) | _SHIFTL( G_CCMUX_##d0, 15, 3 ) | \ + _SHIFTL( G_ACMUX_##Ab0, 12, 3 ) | _SHIFTL( G_ACMUX_##Ad0, 9, 3 ) | \ + _SHIFTL( G_CCMUX_##b1, 24, 4 ) | _SHIFTL( G_ACMUX_##Aa1, 21, 3 ) | \ + _SHIFTL( G_ACMUX_##Ac1, 18, 3 ) | _SHIFTL( G_CCMUX_##d1, 6, 3 ) | \ + _SHIFTL( G_ACMUX_##Ab1, 3, 3 ) | _SHIFTL( G_ACMUX_##Ad1, 0, 3 ))) + +#define G_CC_PRIMITIVE 0, 0, 0, PRIMITIVE, 0, 0, 0, PRIMITIVE +#define G_CC_SHADE 0, 0, 0, SHADE, 0, 0, 0, SHADE +#define G_CC_MODULATEI TEXEL0, 0, SHADE, 0, 0, 0, 0, SHADE +#define G_CC_MODULATEIA TEXEL0, 0, SHADE, 0, TEXEL0, 0, SHADE, 0 +#define G_CC_MODULATEIDECALA TEXEL0, 0, SHADE, 0, 0, 0, 0, TEXEL0 +#define G_CC_MODULATERGB G_CC_MODULATEI +#define G_CC_MODULATERGBA G_CC_MODULATEIA +#define G_CC_MODULATERGBDECALA G_CC_MODULATEIDECALA +#define G_CC_MODULATEI_PRIM TEXEL0, 0, PRIMITIVE, 0, 0, 0, 0, PRIMITIVE +#define G_CC_MODULATEIA_PRIM TEXEL0, 0, PRIMITIVE, 0, TEXEL0, 0, PRIMITIVE, 0 +#define G_CC_MODULATEIDECALA_PRIM TEXEL0, 0, PRIMITIVE, 0, 0, 0, 0, TEXEL0 +#define G_CC_MODULATERGB_PRIM G_CC_MODULATEI_PRIM +#define G_CC_MODULATERGBA_PRIM G_CC_MODULATEIA_PRIM +#define G_CC_MODULATERGBDECALA_PRIM G_CC_MODULATEIDECALA_PRIM +#define G_CC_DECALRGB 0, 0, 0, TEXEL0, 0, 0, 0, SHADE +#define G_CC_DECALRGBA 0, 0, 0, TEXEL0, 0, 0, 0, TEXEL0 +#define G_CC_BLENDI ENVIRONMENT, SHADE, TEXEL0, SHADE, 0, 0, 0, SHADE +#define G_CC_BLENDIA ENVIRONMENT, SHADE, TEXEL0, SHADE, TEXEL0, 0, SHADE, 0 +#define G_CC_BLENDIDECALA ENVIRONMENT, SHADE, TEXEL0, SHADE, 0, 0, 0, TEXEL0 +#define G_CC_BLENDRGBA TEXEL0, SHADE, TEXEL0_ALPHA, SHADE, 0, 0, 0, SHADE +#define G_CC_BLENDRGBDECALA TEXEL0, SHADE, TEXEL0_ALPHA, SHADE, 0, 0, 0, TEXEL0 +#define G_CC_ADDRGB 1, 0, TEXEL0, SHADE, 0, 0, 0, SHADE +#define G_CC_ADDRGBDECALA 1, 0, TEXEL0, SHADE, 0, 0, 0, TEXEL0 +#define G_CC_REFLECTRGB ENVIRONMENT, 0, TEXEL0, SHADE, 0, 0, 0, SHADE +#define G_CC_REFLECTRGBDECALA ENVIRONMENT, 0, TEXEL0, SHADE, 0, 0, 0, TEXEL0 +#define G_CC_HILITERGB PRIMITIVE, SHADE, TEXEL0, SHADE, 0, 0, 0, SHADE +#define G_CC_HILITERGBA PRIMITIVE, SHADE, TEXEL0, SHADE, PRIMITIVE, SHADE, TEXEL0, SHADE +#define G_CC_HILITERGBDECALA PRIMITIVE, SHADE, TEXEL0, SHADE, 0, 0, 0, TEXEL0 +#define G_CC_SHADEDECALA 0, 0, 0, SHADE, 0, 0, 0, TEXEL0 +#define G_CC_BLENDPE PRIMITIVE, ENVIRONMENT, TEXEL0, ENVIRONMENT, TEXEL0, 0, SHADE, 0 +#define G_CC_BLENDPEDECALA PRIMITIVE, ENVIRONMENT, TEXEL0, ENVIRONMENT, 0, 0, 0, TEXEL0 +#define _G_CC_BLENDPE ENVIRONMENT, PRIMITIVE, TEXEL0, PRIMITIVE, TEXEL0, 0, SHADE, 0 +#define _G_CC_BLENDPEDECALA ENVIRONMENT, PRIMITIVE, TEXEL0, PRIMITIVE, 0, 0, 0, TEXEL0 +#define _G_CC_TWOCOLORTEX PRIMITIVE, SHADE, TEXEL0, SHADE, 0, 0, 0, SHADE +#define _G_CC_SPARSEST PRIMITIVE, TEXEL0, LOD_FRACTION, TEXEL0, PRIMITIVE, TEXEL0, LOD_FRACTION, TEXEL0 +#define G_CC_TEMPLERP TEXEL1, TEXEL0, PRIM_LOD_FRAC, TEXEL0, TEXEL1, TEXEL0, PRIM_LOD_FRAC, TEXEL0 +#define G_CC_TRILERP TEXEL1, TEXEL0, LOD_FRACTION, TEXEL0, TEXEL1, TEXEL0, LOD_FRACTION, TEXEL0 +#define G_CC_INTERFERENCE TEXEL0, 0, TEXEL1, 0, TEXEL0, 0, TEXEL1, 0 +#define G_CC_1CYUV2RGB TEXEL0, K4, K5, TEXEL0, 0, 0, 0, SHADE +#define G_CC_YUV2RGB TEXEL1, K4, K5, TEXEL1, 0, 0, 0, 0 +#define G_CC_PASS2 0, 0, 0, COMBINED, 0, 0, 0, COMBINED +#define G_CC_MODULATEI2 COMBINED, 0, SHADE, 0, 0, 0, 0, SHADE +#define G_CC_MODULATEIA2 COMBINED, 0, SHADE, 0, COMBINED, 0, SHADE, 0 +#define G_CC_MODULATERGB2 G_CC_MODULATEI2 +#define G_CC_MODULATERGBA2 G_CC_MODULATEIA2 +#define G_CC_MODULATEI_PRIM2 COMBINED, 0, PRIMITIVE, 0, 0, 0, 0, PRIMITIVE +#define G_CC_MODULATEIA_PRIM2 COMBINED, 0, PRIMITIVE, 0, COMBINED, 0, PRIMITIVE, 0 +#define G_CC_MODULATERGB_PRIM2 G_CC_MODULATEI_PRIM2 +#define G_CC_MODULATERGBA_PRIM2 G_CC_MODULATEIA_PRIM2 +#define G_CC_DECALRGB2 0, 0, 0, COMBINED, 0, 0, 0, SHADE +#define G_CC_BLENDI2 ENVIRONMENT, SHADE, COMBINED, SHADE, 0, 0, 0, SHADE +#define G_CC_BLENDIA2 ENVIRONMENT, SHADE, COMBINED, SHADE, COMBINED, 0, SHADE, 0 +#define G_CC_CHROMA_KEY2 TEXEL0, CENTER, SCALE, 0, 0, 0, 0, 0 +#define G_CC_HILITERGB2 ENVIRONMENT, COMBINED, TEXEL0, COMBINED, 0, 0, 0, SHADE +#define G_CC_HILITERGBA2 ENVIRONMENT, COMBINED, TEXEL0, COMBINED, ENVIRONMENT, COMBINED, TEXEL0, COMBINED +#define G_CC_HILITERGBDECALA2 ENVIRONMENT, COMBINED, TEXEL0, COMBINED, 0, 0, 0, TEXEL0 +#define G_CC_HILITERGBPASSA2 ENVIRONMENT, COMBINED, TEXEL0, COMBINED, 0, 0, 0, COMBINED + +// Internal generalized combiner inputs +#define COMBINED 0 +#define TEXEL0 1 +#define TEXEL1 2 +#define PRIMITIVE 3 +#define SHADE 4 +#define ENVIRONMENT 5 +#define CENTER 6 +#define SCALE 7 +#define COMBINED_ALPHA 8 +#define TEXEL0_ALPHA 9 +#define TEXEL1_ALPHA 10 +#define PRIMITIVE_ALPHA 11 +#define SHADE_ALPHA 12 +#define ENV_ALPHA 13 +#define LOD_FRACTION 14 +#define PRIM_LOD_FRAC 15 +#define NOISE 16 +#define K4 17 +#define K5 18 +#define ONE 19 +#define ZERO 20 +#define UNKNOWN 21 + + +struct UniformLocation +{ + struct {GLint loc; int val;} uTex0, uTex1, uNoise; + struct {GLint loc; int val;} uEnableFog; + struct {GLint loc; float val;} uFogMultiplier, uFogOffset, uAlphaRef, uPrimLODFrac, uRenderState, uK4, uK5; + struct {GLint loc; float val[4];} uEnvColor, uPrimColor, uFogColor; + struct {GLint loc; float val[2];} uTexScale, uTexOffset[2], uCacheShiftScale[2], + uCacheScale[2], uCacheOffset[2]; +}; + +struct ShaderProgram +{ + GLint program; + GLint fragment; + GLint vertex; + int usesT0; //uses texcoord0 attrib + int usesT1; //uses texcoord1 attrib + int usesCol; //uses color attrib + int usesNoise; //requires noise texture + + UniformLocation uniforms; + gDPCombine combine; + u32 flags; + ShaderProgram *left, *right; + u32 lastUsed; +}; + + +//dmux flags: +#define SC_IGNORE_RGB0 (1<<0) +#define SC_IGNORE_ALPHA0 (1<<1) +#define SC_IGNORE_RGB1 (1<<2) +#define SC_IGNORE_ALPHA1 (1<<3) + +class DecodedMux +{ + public: + DecodedMux(u64 mux, bool cycle2); + + void hack(); + bool find(int index, int src); + bool swap(int cycle, int src0, int src1); + bool replace(int cycle, int src, int dest); + + gDPCombine combine; + int decode[4][4]; + int flags; +}; + +extern int CCEncodeA[]; +extern int CCEncodeB[]; +extern int CCEncodeC[]; +extern int CCEncodeD[]; +extern int ACEncodeA[]; +extern int ACEncodeB[]; +extern int ACEncodeC[]; +extern int ACEncodeD[]; + +extern ShaderProgram *scProgramRoot; +extern ShaderProgram *scProgramCurrent; +extern int scProgramChanged; +extern int scProgramCount; + +extern void ShaderCombiner_Init(); +extern void ShaderCombiner_Destroy(); +extern void ShaderCombiner_DeleteProgram(ShaderProgram *prog); +extern void ShaderCombiner_Set(u64 mux, int flags=-1); +extern ShaderProgram *ShaderCombiner_Compile(DecodedMux *dmux, int flags); + +#endif + diff --git a/source/gles2n64/src/Textures.cpp b/source/gles2n64/src/Textures.cpp new file mode 100644 index 0000000..d39f32d --- /dev/null +++ b/source/gles2n64/src/Textures.cpp @@ -0,0 +1,1334 @@ +#include +#include +#include + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#include "Common.h" +#include "Config.h" +#include "OpenGL.h" +#include "Textures.h" +#include "GBI.h" +#include "RSP.h" +#include "gDP.h" +#include "gSP.h" +#include "N64.h" +#include "CRC.h" +#include "convert.h" +#include "2xSAI.h" +//#include "FrameBuffer.h" + +#define FORMAT_NONE 0 +#define FORMAT_I8 1 +#define FORMAT_IA88 2 +#define FORMAT_RGBA4444 3 +#define FORMAT_RGBA5551 4 +#define FORMAT_RGBA8888 5 + +//#define PRINT_TEXTUREFORMAT + +TextureCache cache; + +typedef u32 (*GetTexelFunc)( void *src, u16 x, u16 i, u8 palette ); + +u32 GetNone( void *src, u16 x, u16 i, u8 palette ) +{ + return 0x00000000; +} + +u32 GetCI4IA_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + if (x & 1) + return IA88_RGBA4444( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] ); + else + return IA88_RGBA4444( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] ); +} + +u32 GetCI4IA_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + if (x & 1) + return IA88_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] ); + else + return IA88_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] ); +} + +u32 GetCI4RGBA_RGBA5551( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + if (x & 1) + return RGBA5551_RGBA5551( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] ); + else + return RGBA5551_RGBA5551( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] ); +} + +u32 GetCI4RGBA_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + if (x & 1) + return RGBA5551_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B & 0x0F)] ); + else + return RGBA5551_RGBA8888( *(u16*)&TMEM[256 + (palette << 4) + (color4B >> 4)] ); +} + +u32 GetIA31_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + return IA31_RGBA8888( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) ); +} + +u32 GetIA31_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + return IA31_RGBA4444( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) ); +} + +u32 GetIA31_IA88( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + return IA31_IA88( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) ); +} + +u32 GetI4_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + return I4_RGBA8888( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) ); +} + +u32 GetI4_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + return I4_RGBA4444( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) ); +} + +u32 GetI4_I8( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + return I4_I8( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) ); +} + + +u32 GetI4_IA88( void *src, u16 x, u16 i, u8 palette ) +{ + u8 color4B = ((u8*)src)[(x>>1)^(i<<1)]; + return I4_IA88( (x & 1) ? (color4B & 0x0F) : (color4B >> 4) ); +} + +u32 GetCI8IA_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + return IA88_RGBA4444( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] ); +} + +u32 GetCI8IA_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + return IA88_RGBA8888( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] ); +} + +u32 GetCI8RGBA_RGBA5551( void *src, u16 x, u16 i, u8 palette ) +{ + return RGBA5551_RGBA5551( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] ); +} + +u32 GetCI8RGBA_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + return RGBA5551_RGBA8888( *(u16*)&TMEM[256 + ((u8*)src)[x^(i<<1)]] ); +} + +u32 GetIA44_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + return IA44_RGBA8888(((u8*)src)[x^(i<<1)]); +} + +u32 GetIA44_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + return IA44_RGBA4444(((u8*)src)[x^(i<<1)]); +} + +u32 GetIA44_IA88( void *src, u16 x, u16 i, u8 palette ) +{ + return IA44_IA88(((u8*)src)[x^(i<<1)]); +} + +u32 GetI8_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + return I8_RGBA8888(((u8*)src)[x^(i<<1)]); +} + +u32 GetI8_I8( void *src, u16 x, u16 i, u8 palette ) +{ + return ((u8*)src)[x^(i<<1)]; +} + +u32 GetI8_IA88( void *src, u16 x, u16 i, u8 palette ) +{ + return I8_IA88(((u8*)src)[x^(i<<1)]); +} + +u32 GetI8_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + return I8_RGBA4444(((u8*)src)[x^(i<<1)]); +} + +u32 GetRGBA5551_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + return RGBA5551_RGBA8888( ((u16*)src)[x^i] ); +} + +u32 GetRGBA5551_RGBA5551( void *src, u16 x, u16 i, u8 palette ) +{ + return RGBA5551_RGBA5551( ((u16*)src)[x^i] ); +} + +u32 GetIA88_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + return IA88_RGBA8888(((u16*)src)[x^i]); +} + +u32 GetIA88_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + return IA88_RGBA4444(((u16*)src)[x^i]); +} + +u32 GetIA88_IA88( void *src, u16 x, u16 i, u8 palette ) +{ + return IA88_IA88(((u16*)src)[x^i]); +} + +u32 GetRGBA8888_RGBA8888( void *src, u16 x, u16 i, u8 palette ) +{ + return ((u32*)src)[x^i]; +} + +u32 GetRGBA8888_RGBA4444( void *src, u16 x, u16 i, u8 palette ) +{ + return RGBA8888_RGBA4444(((u32*)src)[x^i]); +} + + +struct TextureFormat +{ + int format; + GetTexelFunc getTexel; + int lineShift, maxTexels; +}; + + +TextureFormat textureFormatIA[4*6] = +{ + // 4-bit + { FORMAT_RGBA5551, GetCI4RGBA_RGBA5551, 4, 4096 }, // RGBA (SELECT) + { FORMAT_NONE, GetNone, 4, 8192 }, // YUV + { FORMAT_RGBA5551, GetCI4RGBA_RGBA5551, 4, 4096 }, // CI + { FORMAT_IA88, GetIA31_IA88, 4, 8192 }, // IA + { FORMAT_IA88, GetI4_IA88, 4, 8192 }, // I + { FORMAT_RGBA8888, GetCI4IA_RGBA8888, 4, 4096 }, // IA Palette + // 8-bit + { FORMAT_RGBA5551, GetCI8RGBA_RGBA5551, 3, 2048 }, // RGBA (SELECT) + { FORMAT_NONE, GetNone, 3, 4096 }, // YUV + { FORMAT_RGBA5551, GetCI8RGBA_RGBA5551, 3, 2048 }, // CI + { FORMAT_IA88, GetIA44_IA88, 3, 4096 }, // IA + { FORMAT_IA88, GetI8_IA88, 3, 4096 }, // I + { FORMAT_RGBA8888, GetCI8IA_RGBA8888, 3, 2048 }, // IA Palette + // 16-bit + { FORMAT_RGBA5551, GetRGBA5551_RGBA5551, 2, 2048 }, // RGBA + { FORMAT_NONE, GetNone, 2, 2048 }, // YUV + { FORMAT_NONE, GetNone, 2, 2048 }, // CI + { FORMAT_IA88, GetIA88_IA88, 2, 2048 }, // IA + { FORMAT_NONE, GetNone, 2, 2048 }, // I + { FORMAT_NONE, GetNone, 2, 2048 }, // IA Palette + // 32-bit + { FORMAT_RGBA8888, GetRGBA8888_RGBA8888, 2, 1024 }, // RGBA + { FORMAT_NONE, GetNone, 2, 1024 }, // YUV + { FORMAT_NONE, GetNone, 2, 1024 }, // CI + { FORMAT_NONE, GetNone, 2, 1024 }, // IA + { FORMAT_NONE, GetNone, 2, 1024 }, // I + { FORMAT_NONE, GetNone, 2, 1024 }, // IA Palette +}; + +TextureFormat textureFormatRGBA[4*6] = +{ + // 4-bit + { FORMAT_RGBA5551, GetCI4RGBA_RGBA5551, 4, 4096 }, // RGBA (SELECT) + { FORMAT_NONE, GetNone, 4, 8192 }, // YUV + { FORMAT_RGBA5551, GetCI4RGBA_RGBA5551, 4, 4096 }, // CI + { FORMAT_RGBA4444, GetIA31_RGBA4444, 4, 8192 }, // IA + { FORMAT_RGBA4444, GetI4_RGBA4444, 4, 8192 }, // I + { FORMAT_RGBA8888, GetCI4IA_RGBA8888, 4, 4096 }, // IA Palette + // 8-bit + { FORMAT_RGBA5551, GetCI8RGBA_RGBA5551, 3, 2048 }, // RGBA (SELECT) + { FORMAT_NONE, GetNone, 3, 4096 }, // YUV + { FORMAT_RGBA5551, GetCI8RGBA_RGBA5551, 3, 2048 }, // CI + { FORMAT_RGBA4444, GetIA44_RGBA4444, 3, 4096 }, // IA + { FORMAT_RGBA8888, GetI8_RGBA8888, 3, 4096 }, // I + { FORMAT_RGBA8888, GetCI8IA_RGBA8888, 3, 2048 }, // IA Palette + // 16-bit + { FORMAT_RGBA5551, GetRGBA5551_RGBA5551, 2, 2048 }, // RGBA + { FORMAT_NONE, GetNone, 2, 2048 }, // YUV + { FORMAT_NONE, GetNone, 2, 2048 }, // CI + { FORMAT_RGBA8888, GetIA88_RGBA8888, 2, 2048 }, // IA + { FORMAT_NONE, GetNone, 2, 2048 }, // I + { FORMAT_NONE, GetNone, 2, 2048 }, // IA Palette + // 32-bit + { FORMAT_RGBA8888, GetRGBA8888_RGBA8888, 2, 1024 }, // RGBA + { FORMAT_NONE, GetNone, 2, 1024 }, // YUV + { FORMAT_NONE, GetNone, 2, 1024 }, // CI + { FORMAT_NONE, GetNone, 2, 1024 }, // IA + { FORMAT_NONE, GetNone, 2, 1024 }, // I + { FORMAT_NONE, GetNone, 2, 1024 }, // IA Palette +}; + + +TextureFormat *textureFormat = textureFormatIA; + +void __texture_format_rgba(int size, int format, TextureFormat *texFormat) +{ + if (size < G_IM_SIZ_16b) + { + if (gDP.otherMode.textureLUT == G_TT_NONE) + *texFormat = textureFormat[size*6 + G_IM_FMT_I]; + else if (gDP.otherMode.textureLUT == G_TT_RGBA16) + *texFormat = textureFormat[size*6 + G_IM_FMT_CI]; + else + *texFormat = textureFormat[size*6 + G_IM_FMT_IA]; + } + else + { + *texFormat = textureFormat[size*6 + G_IM_FMT_RGBA]; + } +} + +void __texture_format_ci(int size, int format, TextureFormat *texFormat) +{ + switch(size) + { + case G_IM_SIZ_4b: + if (gDP.otherMode.textureLUT == G_TT_IA16) + *texFormat = textureFormat[G_IM_SIZ_4b*6 + G_IM_FMT_CI_IA]; + else + *texFormat = textureFormat[G_IM_SIZ_4b*6 + G_IM_FMT_CI]; + break; + + case G_IM_SIZ_8b: + if (gDP.otherMode.textureLUT == G_TT_NONE) + *texFormat = textureFormat[G_IM_SIZ_8b*6 + G_IM_FMT_I]; + else if (gDP.otherMode.textureLUT == G_TT_IA16) + *texFormat = textureFormat[G_IM_SIZ_8b*6 + G_IM_FMT_CI_IA]; + else + *texFormat = textureFormat[G_IM_SIZ_8b*6 + G_IM_FMT_CI]; + break; + + default: + *texFormat = textureFormat[size*6 + format]; + } +} + +void __texture_format(int size, int format, TextureFormat *texFormat) +{ + if (format == G_IM_FMT_RGBA) + { + __texture_format_rgba(size, format, texFormat); + } + else if (format == G_IM_FMT_YUV) + { + *texFormat = textureFormat[size*6 + G_IM_FMT_YUV]; + } + else if (format == G_IM_FMT_CI) + { + __texture_format_ci(size, format, texFormat); + } + else if (format == G_IM_FMT_IA) + { + if (gDP.otherMode.textureLUT != G_TT_NONE) + __texture_format_ci(size, format, texFormat); + else + *texFormat = textureFormat[size*6 + G_IM_FMT_IA]; + } + else if (format == G_IM_FMT_I) + { + if (gDP.otherMode.textureLUT == G_TT_NONE) + *texFormat = textureFormat[size*6 + G_IM_FMT_I]; + else + __texture_format_ci(size, format, texFormat); + } +} + + +int isTexCacheInit = 0; + +void TextureCache_Init() +{ + u32 dummyTexture[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + isTexCacheInit = 1; + cache.current[0] = NULL; + cache.current[1] = NULL; + cache.top = NULL; + cache.bottom = NULL; + cache.numCached = 0; + cache.cachedBytes = 0; + +#ifdef __HASHMAP_OPT + cache.hash.init(11); +#endif + + if (config.texture.useIA) textureFormat = textureFormatIA; + else textureFormat = textureFormatRGBA; + + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glGenTextures( 32, cache.glNoiseNames ); + + srand(time(NULL)); + u8 noise[64*64*2]; + for (u32 i = 0; i < 32; i++) + { + glBindTexture( GL_TEXTURE_2D, cache.glNoiseNames[i] ); + for (u32 y = 0; y < 64; y++) + { + for (u32 x = 0; x < 64; x++) + { + u32 r = (rand()&0xFF); + noise[y*64*2+x*2] = r; + noise[y*64*2+x*2+1] = r; + } + } + glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE_ALPHA, 64, 64, 0, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, noise); + } + + cache.dummy = TextureCache_AddTop(); + cache.dummy->address = 0; + cache.dummy->clampS = 1; + cache.dummy->clampT = 1; + cache.dummy->clampWidth = 4; + cache.dummy->clampHeight = 4; + cache.dummy->crc = 0; + cache.dummy->format = 0; + cache.dummy->size = 0; + cache.dummy->width = 4; + cache.dummy->height = 4; + cache.dummy->realWidth = 0; + cache.dummy->realHeight = 0; + cache.dummy->maskS = 0; + cache.dummy->maskT = 0; + cache.dummy->scaleS = 0.5f; + cache.dummy->scaleT = 0.5f; + cache.dummy->shiftScaleS = 1.0f; + cache.dummy->shiftScaleT = 1.0f; + cache.dummy->textureBytes = 64; + cache.dummy->tMem = 0; + + glBindTexture( GL_TEXTURE_2D, cache.dummy->glName ); + glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, 4, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, dummyTexture); + + cache.cachedBytes = cache.dummy->textureBytes; + TextureCache_ActivateDummy(0); + TextureCache_ActivateDummy(1); + CRC_BuildTable(); +} + +bool TextureCache_Verify() +{ + u16 i = 0; + CachedTexture *current; + + current = cache.top; + + while (current) + { + i++; + current = current->lower; + } + if (i != cache.numCached) return false; + + i = 0; + current = cache.bottom; + while (current) + { + i++; + current = current->higher; + } + if (i != cache.numCached) return false; + + return true; +} + +void TextureCache_RemoveBottom() +{ + CachedTexture *newBottom = cache.bottom->higher; + +#ifdef __HASHMAP_OPT + CachedTexture* tex= cache.hash.find(cache.bottom->crc); + if (tex == cache.bottom) + cache.hash.insert(cache.bottom->crc, NULL); +#endif + + glDeleteTextures( 1, &cache.bottom->glName ); + cache.cachedBytes -= cache.bottom->textureBytes; + + if (cache.bottom == cache.top) + cache.top = NULL; + + free( cache.bottom ); + + cache.bottom = newBottom; + + if (cache.bottom) + cache.bottom->lower = NULL; + + cache.numCached--; +} + +void TextureCache_Remove( CachedTexture *texture ) +{ + if ((texture == cache.bottom) && (texture == cache.top)) + { + cache.top = NULL; + cache.bottom = NULL; + } + else if (texture == cache.bottom) + { + cache.bottom = texture->higher; + + if (cache.bottom) + cache.bottom->lower = NULL; + } + else if (texture == cache.top) + { + cache.top = texture->lower; + + if (cache.top) + cache.top->higher = NULL; + } + else + { + texture->higher->lower = texture->lower; + texture->lower->higher = texture->higher; + } + +#ifdef __HASHMAP_OPT + CachedTexture* tex= cache.hash.find(texture->crc); + if (tex == texture); + cache.hash.insert(texture->crc, NULL); +#endif + + glDeleteTextures( 1, &texture->glName ); + cache.cachedBytes -= texture->textureBytes; + free( texture ); + + cache.numCached--; +} + +CachedTexture *TextureCache_AddTop() +{ + while (cache.cachedBytes > TEXTURECACHE_MAX) + { + if (cache.bottom != cache.dummy) + TextureCache_RemoveBottom(); + else if (cache.dummy->higher) + TextureCache_Remove( cache.dummy->higher ); + } + + CachedTexture *newtop = (CachedTexture*)malloc( sizeof( CachedTexture ) ); + + glGenTextures( 1, &newtop->glName ); + + newtop->lower = cache.top; + newtop->higher = NULL; + + if (cache.top) + cache.top->higher = newtop; + + if (!cache.bottom) + cache.bottom = newtop; + + cache.top = newtop; + + cache.numCached++; + + return newtop; +} + +void TextureCache_MoveToTop( CachedTexture *newtop ) +{ + if (newtop == cache.top) return; + + if (newtop == cache.bottom) + { + cache.bottom = newtop->higher; + cache.bottom->lower = NULL; + } + else + { + newtop->higher->lower = newtop->lower; + newtop->lower->higher = newtop->higher; + } + + newtop->higher = NULL; + newtop->lower = cache.top; + cache.top->higher = newtop; + cache.top = newtop; +} + +void TextureCache_Destroy() +{ + while (cache.bottom) + TextureCache_RemoveBottom(); + + glDeleteTextures( 32, cache.glNoiseNames ); + glDeleteTextures( 1, &cache.dummy->glName ); + +#ifdef __HASHMAP_OPT + cache.hash.destroy(); +#endif + + cache.top = NULL; + cache.bottom = NULL; +} + + + +void TextureCache_LoadBackground( CachedTexture *texInfo ) +{ + u32 *dest, *scaledDest; + u8 *swapped, *src; + u32 numBytes, bpl; + u32 x, y, j, tx, ty; + u16 clampSClamp, clampTClamp; + + int bytePerPixel=0; + TextureFormat texFormat; + GetTexelFunc getTexel; + GLint glWidth=0, glHeight=0; + GLenum glType=0; + GLenum glFormat=0; + + __texture_format(texInfo->size, texInfo->format, &texFormat); + +#ifdef PRINT_TEXTUREFORMAT + printf("BG LUT=%i, TEXTURE SIZE=%i, FORMAT=%i -> GL FORMAT=%i\n", gDP.otherMode.textureLUT, texInfo->size, texInfo->format, texFormat.format); fflush(stdout); +#endif + + if (texFormat.format == FORMAT_NONE) + { + LOG(LOG_WARNING, "No Texture Conversion function available, size=%i format=%i\n", texInfo->size, texInfo->format); + } + + switch(texFormat.format) + { + case FORMAT_I8: + glFormat = GL_LUMINANCE; + glType = GL_UNSIGNED_BYTE; + bytePerPixel = 1; + break; + case FORMAT_IA88: + glFormat = GL_LUMINANCE_ALPHA; + glType = GL_UNSIGNED_BYTE; + bytePerPixel = 2; + break; + case FORMAT_RGBA4444: + glFormat = GL_RGBA; + glType = GL_UNSIGNED_SHORT_4_4_4_4; + bytePerPixel = 2; + break; + case FORMAT_RGBA5551: + glFormat = GL_RGBA; + glType = GL_UNSIGNED_SHORT_5_5_5_1; + bytePerPixel = 2; + break; + case FORMAT_RGBA8888: + glFormat = GL_RGBA; + glType = GL_UNSIGNED_BYTE; + bytePerPixel = 4; + break; + } + + glWidth = texInfo->realWidth; + glHeight = texInfo->realHeight; + texInfo->textureBytes = (glWidth * glHeight) * bytePerPixel; + getTexel = texFormat.getTexel; + + bpl = gSP.bgImage.width << gSP.bgImage.size >> 1; + numBytes = bpl * gSP.bgImage.height; + swapped = (u8*) malloc(numBytes); + dest = (u32*) malloc(texInfo->textureBytes); + + if (!dest || !swapped) + { + LOG(LOG_ERROR, "Malloc failed!\n"); + return; + } + + UnswapCopy(&RDRAM[gSP.bgImage.address], swapped, numBytes); + + clampSClamp = texInfo->width - 1; + clampTClamp = texInfo->height - 1; + + j = 0; + for (y = 0; y < texInfo->realHeight; y++) + { + ty = min(y, clampTClamp); + src = &swapped[bpl * ty]; + for (x = 0; x < texInfo->realWidth; x++) + { + tx = min(x, clampSClamp); + if (bytePerPixel == 4) + ((u32*)dest)[j++] = getTexel(src, tx, 0, texInfo->palette); + else if (bytePerPixel == 2) + ((u16*)dest)[j++] = getTexel(src, tx, 0, texInfo->palette); + else if (bytePerPixel == 1) + ((u8*)dest)[j++] = getTexel(src, tx, 0, texInfo->palette); + } + } + + if (!config.texture.sai2x || (texFormat.format == FORMAT_I8 || texFormat.format == FORMAT_IA88)) + { + glTexImage2D( GL_TEXTURE_2D, 0, glFormat, glWidth, glHeight, 0, glFormat, glType, dest); + } + else + { + LOG(LOG_VERBOSE, "Using 2xSAI Filter on Texture\n"); + texInfo->textureBytes <<= 2; + + scaledDest = (u32*) malloc( texInfo->textureBytes ); + + if (glType == GL_UNSIGNED_BYTE) + _2xSaI8888( (u32*)dest, (u32*)scaledDest, texInfo->realWidth, texInfo->realHeight, texInfo->clampS, texInfo->clampT ); + if (glType == GL_UNSIGNED_SHORT_4_4_4_4) + _2xSaI4444( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, texInfo->clampS, texInfo->clampT ); + else + _2xSaI5551( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, texInfo->clampS, texInfo->clampT ); + + glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, texInfo->realWidth << 1, texInfo->realHeight << 1, 0, GL_RGBA, glType, scaledDest ); + + free( scaledDest ); + } + + free(dest); + free(swapped); + + + if (config.texture.enableMipmap) + glGenerateMipmap(GL_TEXTURE_2D); +} + +void TextureCache_Load( CachedTexture *texInfo ) +{ + u32 *dest, *scaledDest; + + void *src; + u16 x, y, i, j, tx, ty, line; + u16 mirrorSBit, maskSMask, clampSClamp; + u16 mirrorTBit, maskTMask, clampTClamp; + + int bytePerPixel=0; + TextureFormat texFormat; + GetTexelFunc getTexel; + GLint glWidth=0, glHeight=0; + GLenum glType=0; + GLenum glFormat=0; + + __texture_format(texInfo->size, texInfo->format, &texFormat); + +#ifdef PRINT_TEXTUREFORMAT + printf("TEX LUT=%i, TEXTURE SIZE=%i, FORMAT=%i -> GL FORMAT=%i\n", gDP.otherMode.textureLUT, texInfo->size, texInfo->format, texFormat.format); fflush(stdout); +#endif + + if (texFormat.format == FORMAT_NONE) + { + LOG(LOG_WARNING, "No Texture Conversion function available, size=%i format=%i\n", texInfo->size, texInfo->format); + } + + switch(texFormat.format) + { + case FORMAT_I8: + glFormat = GL_LUMINANCE; + glType = GL_UNSIGNED_BYTE; + bytePerPixel = 1; + break; + case FORMAT_IA88: + glFormat = GL_LUMINANCE_ALPHA; + glType = GL_UNSIGNED_BYTE; + bytePerPixel = 2; + break; + case FORMAT_RGBA4444: + glFormat = GL_RGBA; + glType = GL_UNSIGNED_SHORT_4_4_4_4; + bytePerPixel = 2; + break; + case FORMAT_RGBA5551: + glFormat = GL_RGBA; + glType = GL_UNSIGNED_SHORT_5_5_5_1; + bytePerPixel = 2; + break; + case FORMAT_RGBA8888: + glFormat = GL_RGBA; + glType = GL_UNSIGNED_BYTE; + bytePerPixel = 4; + break; + } + + glWidth = texInfo->realWidth; + glHeight = texInfo->realHeight; + texInfo->textureBytes = (glWidth * glHeight) * bytePerPixel; + getTexel = texFormat.getTexel; + + dest = (u32*)malloc(texInfo->textureBytes); + + if (!dest) + { + LOG(LOG_ERROR, "Malloc failed!\n"); + return; + } + + + line = texInfo->line; + + if (texInfo->size == G_IM_SIZ_32b) + line <<= 1; + + if (texInfo->maskS) + { + clampSClamp = texInfo->clampS ? texInfo->clampWidth - 1 : (texInfo->mirrorS ? (texInfo->width << 1) - 1 : texInfo->width - 1); + maskSMask = (1 << texInfo->maskS) - 1; + mirrorSBit = texInfo->mirrorS ? (1 << texInfo->maskS) : 0; + } + else + { + clampSClamp = min( texInfo->clampWidth, texInfo->width ) - 1; + maskSMask = 0xFFFF; + mirrorSBit = 0x0000; + } + + if (texInfo->maskT) + { + clampTClamp = texInfo->clampT ? texInfo->clampHeight - 1 : (texInfo->mirrorT ? (texInfo->height << 1) - 1: texInfo->height - 1); + maskTMask = (1 << texInfo->maskT) - 1; + mirrorTBit = texInfo->mirrorT ? (1 << texInfo->maskT) : 0; + } + else + { + clampTClamp = min( texInfo->clampHeight, texInfo->height ) - 1; + maskTMask = 0xFFFF; + mirrorTBit = 0x0000; + } + + // Hack for Zelda warp texture + if (((texInfo->tMem << 3) + (texInfo->width * texInfo->height << texInfo->size >> 1)) > 4096) + { + texInfo->tMem = 0; + } + + // limit clamp values to min-0 (Perfect Dark has height=0 textures, making negative clamps) + if (clampTClamp & 0x8000) clampTClamp = 0; + if (clampSClamp & 0x8000) clampSClamp = 0; + + j = 0; + for (y = 0; y < texInfo->realHeight; y++) + { + ty = min(y, clampTClamp) & maskTMask; + if (y & mirrorTBit) ty ^= maskTMask; + src = &TMEM[(texInfo->tMem + line * ty) & 511]; + i = (ty & 1) << 1; + for (x = 0; x < texInfo->realWidth; x++) + { + tx = min(x, clampSClamp) & maskSMask; + + if (x & mirrorSBit) tx ^= maskSMask; + + if (bytePerPixel == 4) + { + ((u32*)dest)[j] = getTexel(src, tx, i, texInfo->palette); + } + else if (bytePerPixel == 2) + { + ((u16*)dest)[j] = getTexel(src, tx, i, texInfo->palette); + } + else if (bytePerPixel == 1) + { + ((u8*)dest)[j] = getTexel(src, tx, i, texInfo->palette); + } + j++; + } + } + + if (!config.texture.sai2x || (texFormat.format == FORMAT_I8) || (texFormat.format == FORMAT_IA88)) + { +#ifdef PRINT_TEXTUREFORMAT + printf("j=%u DEST=0x%x SIZE=%i F=0x%x, W=%i, H=%i, T=0x%x\n", j, dest, texInfo->textureBytes,glFormat, glWidth, glHeight, glType); fflush(stdout); +#endif + glTexImage2D( GL_TEXTURE_2D, 0, glFormat, glWidth, glHeight, 0, glFormat, glType, dest); + } + else + { + LOG(LOG_VERBOSE, "Using 2xSAI Filter on Texture\n"); + + texInfo->textureBytes <<= 2; + + scaledDest = (u32*)malloc( texInfo->textureBytes ); + + if (glType == GL_UNSIGNED_BYTE) + _2xSaI8888( (u32*)dest, (u32*)scaledDest, texInfo->realWidth, texInfo->realHeight, 1, 1 ); + else if (glType == GL_UNSIGNED_SHORT_4_4_4_4) + _2xSaI4444( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, 1, 1 ); + else + _2xSaI5551( (u16*)dest, (u16*)scaledDest, texInfo->realWidth, texInfo->realHeight, 1, 1 ); + + glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, texInfo->realWidth << 1, texInfo->realHeight << 1, 0, GL_RGBA, glType, scaledDest ); + + free( scaledDest ); + } + + free(dest); + + if (config.texture.enableMipmap) + glGenerateMipmap(GL_TEXTURE_2D); + +} + +#define max(a,b) ((a) > (b) ? (a) : (b)) + +u32 TextureCache_CalculateCRC( u32 t, u32 width, u32 height ) +{ + u32 crc; + u32 y, /*i,*/ bpl, lineBytes, line; + void *src; + + bpl = width << gSP.textureTile[t]->size >> 1; + lineBytes = gSP.textureTile[t]->line << 3; + + line = gSP.textureTile[t]->line; + if (gSP.textureTile[t]->size == G_IM_SIZ_32b) + line <<= 1; + + crc = 0xFFFFFFFF; + +#ifdef __CRC_OPT + unsigned n = (config.texture.fastCRC) ? max(1, height / 8) : 1; +#else + unsigned n = 1; +#endif + + for (y = 0; y < height; y += n) + { + src = (void*) &TMEM[(gSP.textureTile[t]->tmem + (y * line)) & 511]; + crc = CRC_Calculate( crc, src, bpl ); + } + + if (gSP.textureTile[t]->format == G_IM_FMT_CI) + { + if (gSP.textureTile[t]->size == G_IM_SIZ_4b) + crc = CRC_Calculate( crc, &gDP.paletteCRC16[gSP.textureTile[t]->palette], 4 ); + else if (gSP.textureTile[t]->size == G_IM_SIZ_8b) + crc = CRC_Calculate( crc, &gDP.paletteCRC256, 4 ); + } + return crc; +} + +void TextureCache_ActivateTexture( u32 t, CachedTexture *texture ) +{ + +#ifdef __HASHMAP_OPT + cache.hash.insert(texture->crc, texture); +#endif + + glActiveTexture( GL_TEXTURE0 + t ); + glBindTexture( GL_TEXTURE_2D, texture->glName ); + + // Set filter mode. Almost always bilinear, but check anyways + if ((gDP.otherMode.textureFilter == G_TF_BILERP) || (gDP.otherMode.textureFilter == G_TF_AVERAGE) || (config.texture.forceBilinear)) + { + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR ); + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR ); + } + else + { + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST ); + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST ); + } + + // Set clamping modes + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (texture->clampS) ? GL_CLAMP_TO_EDGE : GL_REPEAT ); + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (texture->clampT) ? GL_CLAMP_TO_EDGE : GL_REPEAT ); + + if (config.texture.maxAnisotropy > 0) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, config.texture.maxAnisotropy); + } + + texture->lastDList = RSP.DList; + TextureCache_MoveToTop( texture ); + cache.current[t] = texture; +} + +void TextureCache_ActivateDummy( u32 t) +{ + glActiveTexture(GL_TEXTURE0 + t); + glBindTexture(GL_TEXTURE_2D, cache.dummy->glName ); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +} + +int _background_compare(CachedTexture *current, u32 crc) +{ + if ((current != NULL) && + (current->crc == crc) && + (current->width == gSP.bgImage.width) && + (current->height == gSP.bgImage.height) && + (current->format == gSP.bgImage.format) && + (current->size == gSP.bgImage.size)) + return 1; + else + return 0; +} + +void TextureCache_UpdateBackground() +{ + u32 numBytes = gSP.bgImage.width * gSP.bgImage.height << gSP.bgImage.size >> 1; + u32 crc; + + crc = CRC_Calculate( 0xFFFFFFFF, &RDRAM[gSP.bgImage.address], numBytes ); + + if (gSP.bgImage.format == G_IM_FMT_CI) + { + if (gSP.bgImage.size == G_IM_SIZ_4b) + crc = CRC_Calculate( crc, &gDP.paletteCRC16[gSP.bgImage.palette], 4 ); + else if (gSP.bgImage.size == G_IM_SIZ_8b) + crc = CRC_Calculate( crc, &gDP.paletteCRC256, 4 ); + } + + //before we traverse cache, check to see if texture is already bound: + if (_background_compare(cache.current[0], crc)) + { + return; + } + +#ifdef __HASHMAP_OPT + CachedTexture *tex = cache.hash.find(crc); + if (tex) + { + if (_background_compare(tex, crc)) + { + TextureCache_ActivateTexture(0, tex); + cache.hits++; + return; + } + } +#endif + + CachedTexture *current = cache.top; + while (current) + { + if (_background_compare(current, crc)) + { + TextureCache_ActivateTexture( 0, current ); + cache.hits++; + return; + } + current = current->lower; + } + cache.misses++; + + glActiveTexture(GL_TEXTURE0); + cache.current[0] = TextureCache_AddTop(); + + glBindTexture( GL_TEXTURE_2D, cache.current[0]->glName ); + cache.current[0]->address = gSP.bgImage.address; + cache.current[0]->crc = crc; + cache.current[0]->format = gSP.bgImage.format; + cache.current[0]->size = gSP.bgImage.size; + cache.current[0]->width = gSP.bgImage.width; + cache.current[0]->height = gSP.bgImage.height; + cache.current[0]->clampWidth = gSP.bgImage.width; + cache.current[0]->clampHeight = gSP.bgImage.height; + cache.current[0]->palette = gSP.bgImage.palette; + cache.current[0]->maskS = 0; + cache.current[0]->maskT = 0; + cache.current[0]->mirrorS = 0; + cache.current[0]->mirrorT = 0; + cache.current[0]->clampS = 1; + cache.current[0]->clampT = 1; + cache.current[0]->line = 0; + cache.current[0]->tMem = 0; + cache.current[0]->lastDList = RSP.DList; + + cache.current[0]->realWidth = (config.texture.pow2) ? pow2(gSP.bgImage.width ) : gSP.bgImage.width; + cache.current[0]->realHeight = (config.texture.pow2) ? pow2(gSP.bgImage.height) : gSP.bgImage.height; + + cache.current[0]->scaleS = 1.0f / (f32)(cache.current[0]->realWidth); + cache.current[0]->scaleT = 1.0f / (f32)(cache.current[0]->realHeight); + cache.current[0]->shiftScaleS = 1.0f; + cache.current[0]->shiftScaleT = 1.0f; + + TextureCache_LoadBackground( cache.current[0] ); + TextureCache_ActivateTexture( 0, cache.current[0] ); + + cache.cachedBytes += cache.current[0]->textureBytes; +} + +int _texture_compare(u32 t, CachedTexture *current, u32 crc, u32 width, u32 height, u32 clampWidth, u32 clampHeight) +{ + if ((current != NULL) && + (current->crc == crc) && + (current->width == width) && + (current->height == height) && + (current->clampWidth == clampWidth) && + (current->clampHeight == clampHeight) && + (current->maskS == gSP.textureTile[t]->masks) && + (current->maskT == gSP.textureTile[t]->maskt) && + (current->mirrorS == gSP.textureTile[t]->mirrors) && + (current->mirrorT == gSP.textureTile[t]->mirrort) && + (current->clampS == gSP.textureTile[t]->clamps) && + (current->clampT == gSP.textureTile[t]->clampt) && + (current->format == gSP.textureTile[t]->format) && + (current->size == gSP.textureTile[t]->size)) + return 1; + else + return 0; +} + + +void TextureCache_Update( u32 t ) +{ + CachedTexture *current; + + u32 crc, maxTexels; + u32 tileWidth, maskWidth, loadWidth, lineWidth, clampWidth, height; + u32 tileHeight, maskHeight, loadHeight, lineHeight, clampHeight, width; + + if (gDP.textureMode == TEXTUREMODE_BGIMAGE) + { + TextureCache_UpdateBackground(); + return; + } + + TextureFormat texFormat; + __texture_format(gSP.textureTile[t]->size, gSP.textureTile[t]->format, &texFormat); + + maxTexels = texFormat.maxTexels; + + // Here comes a bunch of code that just calculates the texture size...I wish there was an easier way... + tileWidth = gSP.textureTile[t]->lrs - gSP.textureTile[t]->uls + 1; + tileHeight = gSP.textureTile[t]->lrt - gSP.textureTile[t]->ult + 1; + + maskWidth = 1 << gSP.textureTile[t]->masks; + maskHeight = 1 << gSP.textureTile[t]->maskt; + + loadWidth = gDP.loadTile->lrs - gDP.loadTile->uls + 1; + loadHeight = gDP.loadTile->lrt - gDP.loadTile->ult + 1; + + lineWidth = gSP.textureTile[t]->line << texFormat.lineShift; + + if (lineWidth) // Don't allow division by zero + lineHeight = min( maxTexels / lineWidth, tileHeight ); + else + lineHeight = 0; + + if (gDP.textureMode == TEXTUREMODE_TEXRECT) + { + u32 texRectWidth = gDP.texRect.width - gSP.textureTile[t]->uls; + u32 texRectHeight = gDP.texRect.height - gSP.textureTile[t]->ult; + + if (gSP.textureTile[t]->masks && ((maskWidth * maskHeight) <= maxTexels)) + width = maskWidth; + else if ((tileWidth * tileHeight) <= maxTexels) + width = tileWidth; + else if ((tileWidth * texRectHeight) <= maxTexels) + width = tileWidth; + else if ((texRectWidth * tileHeight) <= maxTexels) + width = gDP.texRect.width; + else if ((texRectWidth * texRectHeight) <= maxTexels) + width = gDP.texRect.width; + else if (gDP.loadType == LOADTYPE_TILE) + width = loadWidth; + else + width = lineWidth; + + if (gSP.textureTile[t]->maskt && ((maskWidth * maskHeight) <= maxTexels)) + height = maskHeight; + else if ((tileWidth * tileHeight) <= maxTexels) + height = tileHeight; + else if ((tileWidth * texRectHeight) <= maxTexels) + height = gDP.texRect.height; + else if ((texRectWidth * tileHeight) <= maxTexels) + height = tileHeight; + else if ((texRectWidth * texRectHeight) <= maxTexels) + height = gDP.texRect.height; + else if (gDP.loadType == LOADTYPE_TILE) + height = loadHeight; + else + height = lineHeight; + } + else + { + if (gSP.textureTile[t]->masks && ((maskWidth * maskHeight) <= maxTexels)) + width = maskWidth; + else if ((tileWidth * tileHeight) <= maxTexels) + width = tileWidth; + else if (gDP.loadType == LOADTYPE_TILE) + width = loadWidth; + else + width = lineWidth; + + if (gSP.textureTile[t]->maskt && ((maskWidth * maskHeight) <= maxTexels)) + height = maskHeight; + else if ((tileWidth * tileHeight) <= maxTexels) + height = tileHeight; + else if (gDP.loadType == LOADTYPE_TILE) + height = loadHeight; + else + height = lineHeight; + } + + clampWidth = gSP.textureTile[t]->clamps ? tileWidth : width; + clampHeight = gSP.textureTile[t]->clampt ? tileHeight : height; + + if (clampWidth > 256) + gSP.textureTile[t]->clamps = 0; + if (clampHeight > 256) + gSP.textureTile[t]->clampt = 0; + + // Make sure masking is valid + if (maskWidth > width) + { + gSP.textureTile[t]->masks = powof( width ); + maskWidth = 1 << gSP.textureTile[t]->masks; + } + + if (maskHeight > height) + { + gSP.textureTile[t]->maskt = powof( height ); + maskHeight = 1 << gSP.textureTile[t]->maskt; + } + + crc = TextureCache_CalculateCRC( t, width, height ); + + //before we traverse cache, check to see if texture is already bound: + if (_texture_compare(t, cache.current[t], crc, width, height, clampWidth, clampHeight)) + { + cache.hits++; + return; + } + +#ifdef __HASHMAP_OPT + CachedTexture *tex = cache.hash.find(crc); + if (tex) + { + if (_texture_compare(t, tex, crc, width, height, clampWidth, clampHeight)) + { + TextureCache_ActivateTexture( t, tex); + cache.hits++; + return; + } + } +#endif + + current = cache.top; + while (current) + { + if (_texture_compare(t, current, crc, width, height, clampWidth, clampHeight)) + { + TextureCache_ActivateTexture( t, current ); + cache.hits++; + return; + } + + current = current->lower; + } + + cache.misses++; + + glActiveTexture( GL_TEXTURE0 + t); + + cache.current[t] = TextureCache_AddTop(); + + if (cache.current[t] == NULL) + { + LOG(LOG_ERROR, "Texture Cache Failure\n"); + } + + glBindTexture( GL_TEXTURE_2D, cache.current[t]->glName ); + + cache.current[t]->address = gDP.textureImage.address; + cache.current[t]->crc = crc; + + cache.current[t]->format = gSP.textureTile[t]->format; + cache.current[t]->size = gSP.textureTile[t]->size; + + cache.current[t]->width = width; + cache.current[t]->height = height; + cache.current[t]->clampWidth = clampWidth; + cache.current[t]->clampHeight = clampHeight; + cache.current[t]->palette = gSP.textureTile[t]->palette; + cache.current[t]->maskS = gSP.textureTile[t]->masks; + cache.current[t]->maskT = gSP.textureTile[t]->maskt; + cache.current[t]->mirrorS = gSP.textureTile[t]->mirrors; + cache.current[t]->mirrorT = gSP.textureTile[t]->mirrort; + cache.current[t]->clampS = gSP.textureTile[t]->clamps; + cache.current[t]->clampT = gSP.textureTile[t]->clampt; + cache.current[t]->line = gSP.textureTile[t]->line; + cache.current[t]->tMem = gSP.textureTile[t]->tmem; + cache.current[t]->lastDList = RSP.DList; + + + if (cache.current[t]->clampS) + cache.current[t]->realWidth = (config.texture.pow2) ? pow2(clampWidth) : clampWidth; + else if (cache.current[t]->mirrorS) + cache.current[t]->realWidth = maskWidth << 1; + else + cache.current[t]->realWidth = (config.texture.pow2) ? pow2(width) : width; + + if (cache.current[t]->clampT) + cache.current[t]->realHeight = (config.texture.pow2) ? pow2(clampHeight) : clampHeight; + else if (cache.current[t]->mirrorT) + cache.current[t]->realHeight = maskHeight << 1; + else + cache.current[t]->realHeight = (config.texture.pow2) ? pow2(height) : height; + + + cache.current[t]->scaleS = 1.0f / (f32)(cache.current[t]->realWidth); + cache.current[t]->scaleT = 1.0f / (f32)(cache.current[t]->realHeight); + + // Hack for Zelda Sun + if ((config.hackZelda) && (gDP.combine.mux == 0x00262a60150c937fLL)) + { + if ((cache.current[t]->format = G_IM_FMT_I) && (cache.current[t]->size == G_IM_SIZ_8b) && + (cache.current[t]->width == 64)) + { + cache.current[t]->scaleS *= 0.5f; + cache.current[t]->scaleT *= 0.5f; + } + } + + cache.current[t]->shiftScaleS = 1.0f; + cache.current[t]->shiftScaleT = 1.0f; + + cache.current[t]->offsetS = config.texture.sai2x ? 0.25f : 0.5f; + cache.current[t]->offsetT = config.texture.sai2x ? 0.25f : 0.5f; + + if (gSP.textureTile[t]->shifts > 10) + cache.current[t]->shiftScaleS = (f32)(1 << (16 - gSP.textureTile[t]->shifts)); + else if (gSP.textureTile[t]->shifts > 0) + cache.current[t]->shiftScaleS /= (f32)(1 << gSP.textureTile[t]->shifts); + + if (gSP.textureTile[t]->shiftt > 10) + cache.current[t]->shiftScaleT = (f32)(1 << (16 - gSP.textureTile[t]->shiftt)); + else if (gSP.textureTile[t]->shiftt > 0) + cache.current[t]->shiftScaleT /= (f32)(1 << gSP.textureTile[t]->shiftt); + + TextureCache_Load( cache.current[t] ); + TextureCache_ActivateTexture( t, cache.current[t] ); + + cache.cachedBytes += cache.current[t]->textureBytes; +} + +void TextureCache_ActivateNoise(u32 t) +{ + glActiveTexture(GL_TEXTURE0 + t); + glBindTexture(GL_TEXTURE_2D, cache.glNoiseNames[RSP.DList & 0x1F]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT ); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT ); +} + diff --git a/source/gles2n64/src/Textures.h b/source/gles2n64/src/Textures.h new file mode 100644 index 0000000..57ff50d --- /dev/null +++ b/source/gles2n64/src/Textures.h @@ -0,0 +1,91 @@ +#ifndef TEXTURES_H +#define TEXTURES_H + +#include + +#include "Hash.h" +#include "convert.h" + +struct CachedTexture +{ + GLuint glName; + u32 address; + u32 crc; + float offsetS, offsetT; + u32 maskS, maskT; + u32 clampS, clampT; + u32 mirrorS, mirrorT; + u32 line; + u32 size; + u32 format; + u32 tMem; + u32 palette; + u32 width, height; // N64 width and height + u32 clampWidth, clampHeight; // Size to clamp to + u32 realWidth, realHeight; // Actual texture size + f32 scaleS, scaleT; // Scale to map to 0.0-1.0 + f32 shiftScaleS, shiftScaleT; // Scale to shift + u32 textureBytes; + + CachedTexture *lower, *higher; + u32 lastDList; + +}; + +#define TEXTURECACHE_MAX (8 * 1024 * 1024) +#define TEXTUREBUFFER_SIZE (512 * 1024) + +struct TextureCache +{ + CachedTexture *current[2]; + CachedTexture *bottom, *top; + CachedTexture *dummy; + + u32 cachedBytes; + u32 numCached; + u32 hits, misses; + GLuint glNoiseNames[32]; + + HashMap hash; + +}; + +extern TextureCache cache; + +inline u32 pow2( u32 dim ) +{ + u32 i = 1; + + while (i < dim) i <<= 1; + + return i; +} + +inline u32 powof( u32 dim ) +{ + u32 num = 1; + u32 i = 0; + + while (num < dim) + { + num <<= 1; + i++; + } + + return i; +} + +CachedTexture *TextureCache_AddTop(); +void TextureCache_MoveToTop( CachedTexture *newtop ); +void TextureCache_Remove( CachedTexture *texture ); +void TextureCache_RemoveBottom(); +void TextureCache_Init(); +void TextureCache_Destroy(); +void TextureCache_Update( u32 t ); +void TextureCache_ActivateTexture( u32 t, CachedTexture *texture ); +void TextureCache_ActivateNoise( u32 t ); +void TextureCache_ActivateDummy( u32 t ); +bool TextureCache_Verify(); + +#endif + diff --git a/source/gles2n64/src/Types.h b/source/gles2n64/src/Types.h new file mode 100644 index 0000000..64753c0 --- /dev/null +++ b/source/gles2n64/src/Types.h @@ -0,0 +1,42 @@ +#ifndef TYPES_H +#define TYPES_H + +#include + +typedef uint8_t u8; /* unsigned 8-bit */ +typedef uint16_t u16; /* unsigned 16-bit */ +typedef uint32_t u32; /* unsigned 32-bit */ +typedef uint64_t u64; /* unsigned 64-bit */ + +typedef int8_t s8; /* signed 8-bit */ +typedef int16_t s16; /* signed 16-bit */ +typedef int32_t s32; /* signed 32-bit */ +typedef int64_t s64; /* signed 64-bit */ + +typedef volatile uint8_t vu8; /* unsigned 8-bit */ +typedef volatile uint16_t vu16; /* unsigned 16-bit */ +typedef volatile uint32_t vu32; /* unsigned 32-bit */ +typedef volatile uint64_t vu64; /* unsigned 64-bit */ + +typedef volatile int8_t vs8; /* signed 8-bit */ +typedef volatile int16_t vs16; /* signed 16-bit */ +typedef volatile int32_t vs32; /* signed 32-bit */ +typedef volatile int64_t vs64; /* signed 64-bit */ + +typedef float f32; /* single prec floating point */ +typedef double f64; /* double prec floating point */ + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef NULL +#define NULL 0 +#endif + +#endif // TYPES_H + diff --git a/source/gles2n64/src/VI.cpp b/source/gles2n64/src/VI.cpp new file mode 100644 index 0000000..37e280f --- /dev/null +++ b/source/gles2n64/src/VI.cpp @@ -0,0 +1,101 @@ +#include "Common.h" +#include "gles2N64.h" +#include "Types.h" +#include "VI.h" +#include "OpenGL.h" +#include "N64.h" +#include "gSP.h" +#include "gDP.h" +#include "RSP.h" +#include "Debug.h" +#include "Config.h" + +VIInfo VI; + +void VI_UpdateSize() +{ + + if (!config.video.force) + { + f32 xScale = _FIXED2FLOAT( _SHIFTR( *REG.VI_X_SCALE, 0, 12 ), 10 ); + f32 xOffset = _FIXED2FLOAT( _SHIFTR( *REG.VI_X_SCALE, 16, 12 ), 10 ); + + f32 yScale = _FIXED2FLOAT( _SHIFTR( *REG.VI_Y_SCALE, 0, 12 ), 10 ); + f32 yOffset = _FIXED2FLOAT( _SHIFTR( *REG.VI_Y_SCALE, 16, 12 ), 10 ); + + u32 hEnd = _SHIFTR( *REG.VI_H_START, 0, 10 ); + u32 hStart = _SHIFTR( *REG.VI_H_START, 16, 10 ); + + // These are in half-lines, so shift an extra bit + u32 vEnd = _SHIFTR( *REG.VI_V_START, 1, 9 ); + u32 vStart = _SHIFTR( *REG.VI_V_START, 17, 9 ); + + //Glide does this: + if (hEnd == hStart) hEnd = (u32)(*REG.VI_WIDTH / xScale); + + + VI.width = (hEnd - hStart) * xScale; + VI.height = (vEnd - vStart) * yScale * 1.0126582f; + } + else + { + VI.width = config.video.width; + VI.height = config.video.height; + } + + if (VI.width == 0.0f) VI.width = 320.0f; + if (VI.height == 0.0f) VI.height = 240.0f; + VI.rwidth = 1.0f / VI.width; + VI.rheight = 1.0f / VI.height; + + + //add display buffer if doesn't exist + if (config.ignoreOffscreenRendering) + { + int i; + //int start = *REG.VI_ORIGIN; + u32 start = RSP_SegmentToPhysical(*REG.VI_ORIGIN) & 0x00FFFFFF; + u32 end = min(start + VI.width * VI.height * 4, RDRAMSize); + for(i = 0; i < VI.displayNum; i++) + { + if (VI.display[i].start <= end && VI.display[i].start >= start) break; + if (start <= VI.display[i].end && start >= VI.display[i].start) break; + } + if (i == VI.displayNum) + { + //printf("VI IMAGE=%i\n", o); + VI.display[i%16].start = start; + VI.display[i%16].end = end; + VI.displayNum = (VI.displayNum < 16) ? (VI.displayNum+1) : 16; + } + } + +} + +void VI_UpdateScreen() +{ + + switch(config.updateMode) + { + + case SCREEN_UPDATE_AT_VI_CHANGE: + if (*REG.VI_ORIGIN != VI.lastOrigin) + { + if (*REG.VI_ORIGIN < VI.lastOrigin || *REG.VI_ORIGIN > VI.lastOrigin+0x2000 ) + OGL_SwapBuffers(); + + VI.lastOrigin = *REG.VI_ORIGIN; + } + break; + + case SCREEN_UPDATE_AT_VI_UPDATE: + if (gSP.changed & CHANGED_COLORBUFFER) + { + OGL_SwapBuffers(); + gSP.changed &= ~CHANGED_COLORBUFFER; + } + break; + } + +} + diff --git a/source/gles2n64/src/VI.h b/source/gles2n64/src/VI.h new file mode 100644 index 0000000..c138744 --- /dev/null +++ b/source/gles2n64/src/VI.h @@ -0,0 +1,27 @@ +#ifndef VI_H +#define VI_H +#include "Types.h" + +struct VIInfo +{ + u32 width, height; + f32 rwidth, rheight; + u32 lastOrigin; + + u32 realWidth, realHeight; + + struct{ + u32 start, end; + } display[16]; + + u32 displayNum; + +}; + +extern VIInfo VI; + +void VI_UpdateSize(); +void VI_UpdateScreen(); + +#endif + diff --git a/source/gles2n64/src/convert.h b/source/gles2n64/src/convert.h new file mode 100644 index 0000000..dbed5e7 --- /dev/null +++ b/source/gles2n64/src/convert.h @@ -0,0 +1,338 @@ +#ifndef CONVERT_H +#define CONVERT_H + +#include "Types.h" + +const volatile unsigned char Five2Eight[32] = +{ + 0, // 00000 = 00000000 + 8, // 00001 = 00001000 + 16, // 00010 = 00010000 + 25, // 00011 = 00011001 + 33, // 00100 = 00100001 + 41, // 00101 = 00101001 + 49, // 00110 = 00110001 + 58, // 00111 = 00111010 + 66, // 01000 = 01000010 + 74, // 01001 = 01001010 + 82, // 01010 = 01010010 + 90, // 01011 = 01011010 + 99, // 01100 = 01100011 + 107, // 01101 = 01101011 + 115, // 01110 = 01110011 + 123, // 01111 = 01111011 + 132, // 10000 = 10000100 + 140, // 10001 = 10001100 + 148, // 10010 = 10010100 + 156, // 10011 = 10011100 + 165, // 10100 = 10100101 + 173, // 10101 = 10101101 + 181, // 10110 = 10110101 + 189, // 10111 = 10111101 + 197, // 11000 = 11000101 + 206, // 11001 = 11001110 + 214, // 11010 = 11010110 + 222, // 11011 = 11011110 + 230, // 11100 = 11100110 + 239, // 11101 = 11101111 + 247, // 11110 = 11110111 + 255 // 11111 = 11111111 +}; + +const volatile unsigned char Four2Eight[16] = +{ + 0, // 0000 = 00000000 + 17, // 0001 = 00010001 + 34, // 0010 = 00100010 + 51, // 0011 = 00110011 + 68, // 0100 = 01000100 + 85, // 0101 = 01010101 + 102, // 0110 = 01100110 + 119, // 0111 = 01110111 + 136, // 1000 = 10001000 + 153, // 1001 = 10011001 + 170, // 1010 = 10101010 + 187, // 1011 = 10111011 + 204, // 1100 = 11001100 + 221, // 1101 = 11011101 + 238, // 1110 = 11101110 + 255 // 1111 = 11111111 +}; + +const volatile unsigned char Three2Four[8] = +{ + 0, // 000 = 0000 + 2, // 001 = 0010 + 4, // 010 = 0100 + 6, // 011 = 0110 + 9, // 100 = 1001 + 11, // 101 = 1011 + 13, // 110 = 1101 + 15, // 111 = 1111 +}; + +const volatile unsigned char Three2Eight[8] = +{ + 0, // 000 = 00000000 + 36, // 001 = 00100100 + 73, // 010 = 01001001 + 109, // 011 = 01101101 + 146, // 100 = 10010010 + 182, // 101 = 10110110 + 219, // 110 = 11011011 + 255, // 111 = 11111111 +}; +const volatile unsigned char Two2Eight[4] = +{ + 0, // 00 = 00000000 + 85, // 01 = 01010101 + 170, // 10 = 10101010 + 255 // 11 = 11111111 +}; + +const volatile unsigned char One2Four[2] = +{ + 0, // 0 = 0000 + 15, // 1 = 1111 +}; + +const volatile unsigned char One2Eight[2] = +{ + 0, // 0 = 00000000 + 255, // 1 = 11111111 +}; + +static inline void UnswapCopy( void *src, void *dest, u32 numBytes ) +{ + // copy leading bytes + int leadingBytes = ((long)src) & 3; + if (leadingBytes != 0) + { + leadingBytes = 4-leadingBytes; + if ((unsigned int)leadingBytes > numBytes) + leadingBytes = numBytes; + numBytes -= leadingBytes; + + src = (void *)((long)src ^ 3); + for (int i = 0; i < leadingBytes; i++) + { + *(u8 *)(dest) = *(u8 *)(src); + dest = (void *)((long)dest+1); + src = (void *)((long)src -1); + } + src = (void *)((long)src+5); + } + + // copy dwords + int numDWords = numBytes >> 2; + while (numDWords--) + { + u32 dword = *(u32 *)src; +#ifdef ARM_ASM + asm("rev %0, %0" : "+r"(dword)::); +#else + dword = ((dword<<24)|((dword<<8)&0x00FF0000)|((dword>>8)&0x0000FF00)|(dword>>24)); +#endif + *(u32 *)dest = dword; + dest = (void *)((long)dest+4); + src = (void *)((long)src +4); + } + + // copy trailing bytes + int trailingBytes = numBytes & 3; + if (trailingBytes) + { + src = (void *)((long)src ^ 3); + for (int i = 0; i < trailingBytes; i++) + { + *(u8 *)(dest) = *(u8 *)(src); + dest = (void *)((long)dest+1); + src = (void *)((long)src -1); + } + } +} + +static inline void DWordInterleave( void *mem, u32 numDWords ) +{ + int tmp; + while( numDWords-- ) + { + tmp = *(int *)((long)mem + 0); + *(int *)((long)mem + 0) = *(int *)((long)mem + 4); + *(int *)((long)mem + 4) = tmp; + mem = (void *)((long)mem + 8); + } +} + +inline void QWordInterleave( void *mem, u32 numDWords ) +{ + numDWords >>= 1; // qwords + while( numDWords-- ) + { + int tmp0, tmp1; + tmp0 = *(int *)((long)mem + 0); + tmp1 = *(int *)((long)mem + 4); + *(int *)((long)mem + 0) = *(int *)((long)mem + 8); + *(int *)((long)mem + 8) = tmp0; + *(int *)((long)mem + 4) = *(int *)((long)mem + 12); + *(int *)((long)mem + 12) = tmp1; + mem = (void *)((long)mem + 16); + } +} + + +inline u32 swapdword( u32 value ) +{ +#ifdef ARM_ASM + asm("rev %0, %0" : "+r"(value)::); + return value; +#else + return ((value & 0xff000000) >> 24) | + ((value & 0x00ff0000) >> 8) | + ((value & 0x0000ff00) << 8) | + ((value & 0x000000ff) << 24); +#endif +} + +inline u16 swapword( u16 value ) +{ +#ifdef ARM_ASM + asm("rev16 %0, %0" : "+r"(value)::); + return value; +#else + return (value << 8) | (value >> 8); +#endif +} + +inline u16 RGBA8888_RGBA4444( u32 color ) +{ + return ((color & 0x000000f0) << 8) | // r + ((color & 0x0000f000) >> 4) | // g + ((color & 0x00f00000) >> 16) | // b + ((color & 0xf0000000) >> 28); // a +} + +inline u32 RGBA5551_RGBA8888( u16 color ) +{ + color = swapword( color ); + u8 r, g, b, a; + r = Five2Eight[color >> 11]; + g = Five2Eight[(color >> 6) & 0x001f]; + b = Five2Eight[(color >> 1) & 0x001f]; + a = One2Eight [(color ) & 0x0001]; + return (a << 24) | (b << 16) | (g << 8) | r; +} + +// Just swaps the word +inline u16 RGBA5551_RGBA5551( u16 color ) +{ + return swapword( color ); +} + +inline u32 IA88_RGBA8888( u16 color ) +{ + u8 a = color >> 8; + u8 i = color & 0x00FF; + return (a << 24) | (i << 16) | (i << 8) | i; +} + +inline u16 IA88_RGBA4444( u16 color ) +{ + u8 i = color >> 12; + u8 a = (color >> 4) & 0x000F; + return (i << 12) | (i << 8) | (i << 4) | a; +} + +inline u16 IA44_RGBA4444( u8 color ) +{ + return ((color & 0xf0) << 8) | ((color & 0xf0) << 4) | (color); +} + +inline u32 IA44_RGBA8888( u8 color ) +{ + u8 i = Four2Eight[color >> 4]; + u8 a = Four2Eight[color & 0x0F]; + return (a << 24) | (i << 16) | (i << 8) | i; +} + +inline u16 IA44_IA88( u8 color ) +{ + u8 i = Four2Eight[color >> 4]; + u8 a = Four2Eight[color & 0x0F]; + return (a << 8) | i; +} + +inline u16 IA31_RGBA4444( u8 color ) +{ + u8 i = Three2Four[color >> 1]; + u8 a = One2Four[color & 0x01]; + return (i << 12) | (i << 8) | (i << 4) | a; +} + +inline u16 IA31_IA88( u8 color ) +{ + u8 i = Three2Eight[color >> 1]; + u8 a = One2Eight[color & 0x01]; + return (a << 8) | i; +} + +inline u32 IA31_RGBA8888( u8 color ) +{ + u8 i = Three2Eight[color >> 1]; + u8 a = One2Eight[color & 0x01]; + return (i << 24) | (i << 16) | (i << 8) | a; +} + +inline u16 I8_RGBA4444( u8 color ) +{ + u8 c = color >> 4; + return (c << 12) | (c << 8) | (c << 4) | c; +} + +inline u32 I8_RGBA8888( u8 color ) +{ + return (color << 24) | (color << 16) | (color << 8) | color; +} + +inline u16 I4_RGBA4444( u8 color ) +{ + u16 ret = color & 0x0f; + ret |= ret << 4; + ret |= ret << 8; + return ret; +} + +inline u8 I4_I8( u8 color ) +{ + return Four2Eight[color & 0x0f]; +} + +inline u16 I4_IA88( u8 color ) +{ + u32 c = Four2Eight[color & 0x0f]; + return (c << 8) | c; +} + +inline u16 I8_IA88( u8 color ) +{ + return (color << 8) | color; +} + + +inline u16 IA88_IA88( u16 color ) +{ + u8 a = (color&0xFF); + u8 i = (color>>8); + return (i << 8) | a; +} + + +inline u32 I4_RGBA8888( u8 color ) +{ + u8 c = Four2Eight[color]; + c |= c << 4; + return (c << 24) | (c << 16) | (c << 8) | c; +} + +#endif // CONVERT_H + diff --git a/source/gles2n64/src/eglport.cpp b/source/gles2n64/src/eglport.cpp new file mode 100755 index 0000000..b6c5e2a --- /dev/null +++ b/source/gles2n64/src/eglport.cpp @@ -0,0 +1,706 @@ +/** + * + * EGLPORT.C + * Copyright (C) 2011-2013 Scott R. Smith + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#include "eglport.h" + +#include +#include + +#define USE_EGL_SDL 1 +#define USE_GLES2 1 + +#if defined(USE_EGL_SDL) +#include "SDL.h" +#include "SDL_syswm.h" +SDL_SysWMinfo sysWmInfo; /** Holds our X Display/Window information */ +#endif /* USE_EGL_SDL */ + +#if defined(PANDORA) /* Pandora VSync Support */ +#include +#include +#include +#include + +#ifndef FBIO_WAITFORVSYNC +#define FBIO_WAITFORVSYNC _IOW('F', 0x20, __u32) +#endif +int fbdev = -1; + +#elif defined(RPI) +#include "bcm_host.h" +#endif /* PANDORA */ + +enum EGL_RENDER_T { + RENDER_RAW=0, /** Sets render mode to raw or framebuffer mode. */ + RENDER_SDL, /** Sets render mode to X11/SDL mode. */ + RENDER_TOTAL +}; + +enum EGL_SETTINGS_T { + CFG_MODE=0, /** Render mode for EGL 0=RAW 1=SDL. */ + CFG_VSYNC, /** Controls system vsync if available. */ + CFG_FSAA, /** Number of samples for full screen AA. 0 is off, 2/4 samples. */ + CFG_FPS, /** Calculate and report frame per second. */ + CFG_RED_SIZE, /** Number of bits of Red in the color buffer. */ + CFG_GREEN_SIZE, /** Number of bits of Green in the color buffer. */ + CFG_BLUE_SIZE, /** Number of bits of Blue in the color buffer. */ + CFG_ALPHA_SIZE, /** Number of bits of Alpha in the color buffer. */ + CFG_DEPTH_SIZE, /** Number of bits of Z in the depth buffer. */ + CFG_BUFFER_SIZE, /** The total color component bits in the color buffer. */ + CFG_STENCIL_SIZE, /** Number of bits of Stencil in the stencil buffer. */ + CFG_TOTAL /** Total number of settings. */ +}; + +NativeDisplayType nativeDisplay = 0; /** Reference to the systems native display */ +NativeWindowType nativeWindow = 0; /** Reference to the systems native window */ +EGLint eglSettings[CFG_TOTAL]; /** Stores setting values. */ +EGLDisplay eglDisplay = NULL; /** Reference to the EGL display */ +EGLConfig eglConfig = NULL; /** Reference to the EGL config */ +EGLContext eglContext = NULL; /** Reference to the EGL context */ +EGLSurface eglSurface = NULL; /** Reference to the EGL surface */ + +#define totalConfigsIn 5 /** Total number of configurations to request */ +EGLint totalConfigsFound = 0; /** Total number of configurations matching attributes */ +EGLConfig eglConfigs[totalConfigsIn]; /** Structure containing references to matching configurations */ + +uint32_t fpsCount = 0; /** Total number of frames counted */ +uint32_t fpsTime = 0; /** Start time of frame count measurment */ + +int8_t eglColorbits = 0; +int8_t eglDepthbits = 0; +int8_t eglStencilbits = 0; + + +/** Private API */ +void OpenCfg ( const char* file ); +int8_t ConfigureEGL ( EGLConfig config ); +int8_t FindEGLConfigs ( void ); +int8_t CheckEGLErrors ( const char* file, uint16_t line ); + +int8_t GetNativeDisplay ( void ); +int8_t GetNativeWindow ( uint16_t width, uint16_t height ); +void FreeNativeDisplay ( void ); +void FreeNativeWindow ( void ); + +void Platform_Open ( void ); +void Platform_Close ( void ); +void Platform_VSync ( void ); +uint32_t Platform_GetTicks ( void ); + +/** @brief Release all EGL and system resources + */ +void EGL_Close( void ) +{ + /* Release EGL resources */ + if (eglDisplay != NULL) + { + peglMakeCurrent( eglDisplay, NULL, NULL, EGL_NO_CONTEXT ); + if (eglContext != NULL) { + peglDestroyContext( eglDisplay, eglContext ); + } + if (eglSurface != NULL) { + peglDestroySurface( eglDisplay, eglSurface ); + } + peglTerminate( eglDisplay ); + } + + eglSurface = NULL; + eglContext = NULL; + eglDisplay = NULL; + + eglColorbits = 0; + eglDepthbits = 0; + eglStencilbits = 0; + + /* Release platform resources */ + FreeNativeWindow(); + FreeNativeDisplay(); + Platform_Close(); + + CheckEGLErrors( __FILE__, __LINE__ ); + + printf( "EGLport: Closed\n" ); +} + +/** @brief Swap the surface buffer onto the display + */ +void EGL_SwapBuffers( void ) +{ + if (eglSettings[CFG_VSYNC] != 0) { + Platform_VSync(); + } + + peglSwapBuffers( eglDisplay, eglSurface ); + + if (eglSettings[CFG_FPS] != 0) { + fpsCount++; + + if (fpsTime - Platform_GetTicks() >= 1000) + { + printf( "EGLport: %d fps\n", fpsCount ); + fpsTime = Platform_GetTicks(); + fpsCount = 0; + } + } +} + +/** @brief Obtain the system display and initialize EGL + * @param width : desired pixel width of the window (not used by all platforms) + * @param height : desired pixel height of the window (not used by all platforms) + * @return : 0 if the function passed, else 1 + */ +int8_t EGL_Open( uint16_t width, uint16_t height ) +{ + EGLint eglMajorVer, eglMinorVer; + EGLBoolean result; + uint32_t configIndex = 0; + const char* output; + + static const EGLint contextAttribs[] = + { +#if defined(USE_GLES2) + EGL_CONTEXT_CLIENT_VERSION, 2, +#endif + EGL_NONE + }; + +#if defined(DEBUG) + printf( "EGLport Warning: DEBUG is enabled which may effect performance\n" ); +#endif + + /* Check that system is not open */ + if (eglDisplay != NULL || eglContext != NULL || eglSurface != NULL) + { + printf( "EGLport ERROR: EGL system is already open!\n" ); + return 1; + } + + /* Check for the cfg file to alternative settings */ + OpenCfg( "eglport.cfg" ); + + /* Setup any platform specific bits */ + Platform_Open(); + + printf( "EGLport: Opening EGL display\n" ); + if (GetNativeDisplay() != 0) + { + printf( "EGLport ERROR: Unable to obtain native display!\n" ); + return 1; + } + + eglDisplay = peglGetDisplay( nativeDisplay ); + if (eglDisplay == EGL_NO_DISPLAY) + { + CheckEGLErrors( __FILE__, __LINE__ ); + printf( "EGLport ERROR: Unable to create EGL display.\n" ); + return 1; + } + + printf( "EGLport: Initializing\n" ); + result = peglInitialize( eglDisplay, &eglMajorVer, &eglMinorVer ); + if (result != EGL_TRUE ) + { + CheckEGLErrors( __FILE__, __LINE__ ); + printf( "EGLport ERROR: Unable to initialize EGL display.\n" ); + return 1; + } + + /* Get EGL Library Information */ + printf( "EGL Implementation Version: Major %d Minor %d\n", eglMajorVer, eglMinorVer ); + output = peglQueryString( eglDisplay, EGL_VENDOR ); + printf( "EGL_VENDOR: %s\n", output ); + output = peglQueryString( eglDisplay, EGL_VERSION ); + printf( "EGL_VERSION: %s\n", output ); + output = peglQueryString( eglDisplay, EGL_EXTENSIONS ); + printf( "EGL_EXTENSIONS: %s\n", output ); + + if (FindEGLConfigs() != 0) + { + printf( "EGLport ERROR: Unable to configure EGL. See previous error.\n" ); + return 1; + } + + printf( "EGLport: Using Config %d\n", configIndex ); +#if defined(EGL_VERSION_1_2) + /* Bind GLES and create the context */ + printf( "EGLport: Binding API\n" ); + result = peglBindAPI( EGL_OPENGL_ES_API ); + if ( result == EGL_FALSE ) + { + CheckEGLErrors( __FILE__, __LINE__ ); + printf( "EGLport ERROR: Could not bind EGL API.\n" ); + return 1; + } +#endif /* EGL_VERSION_1_2 */ + + printf( "EGLport: Creating Context\n" ); + eglContext = peglCreateContext( eglDisplay, eglConfigs[configIndex], NULL, contextAttribs ); + if (eglContext == EGL_NO_CONTEXT) + { + CheckEGLErrors( __FILE__, __LINE__ ); + printf( "EGLport ERROR: Unable to create GLES context!\n"); + return 1; + } + + printf( "EGLport: Creating window surface\n" ); + if (GetNativeWindow( width, height ) != 0) + { + printf( "EGLport ERROR: Unable to obtain native window!\n" ); + return 1; + } + + eglSurface = peglCreateWindowSurface( eglDisplay, eglConfigs[configIndex], nativeWindow, 0 ); + if (eglSurface == EGL_NO_SURFACE) + { + CheckEGLErrors( __FILE__, __LINE__ ); + printf( "EGLport ERROR: Unable to create EGL surface!\n" ); + return 1; + } + + printf( "EGLport: Making Current\n" ); + result = peglMakeCurrent( eglDisplay, eglSurface, eglSurface, eglContext ); + if (result != EGL_TRUE) + { + CheckEGLErrors( __FILE__, __LINE__ ); + printf( "EGLport ERROR: Unable to make GLES context current\n" ); + return 1; + } + + { + EGLint color, depth, stencil; + eglGetConfigAttrib(eglDisplay, eglConfigs[configIndex], EGL_BUFFER_SIZE, &color); + eglGetConfigAttrib(eglDisplay, eglConfigs[configIndex], EGL_DEPTH_SIZE, &depth); + eglGetConfigAttrib(eglDisplay, eglConfigs[configIndex], EGL_STENCIL_SIZE, &stencil); + eglColorbits = (color==16)?5:8; //quick hack + eglDepthbits = depth; + eglStencilbits = stencil; + } + + printf( "EGLport: Setting swap interval\n" ); + peglSwapInterval( eglDisplay, (eglSettings[CFG_VSYNC] > 0) ? 1 : 0 ); + + printf( "EGLport: Complete\n" ); + + CheckEGLErrors( __FILE__, __LINE__ ); + + return 0; +} + +/** @brief Read settings that configure how to use EGL + * @param file : name of the config file + */ +void OpenCfg ( const char* file ) +{ + #define MAX_STRING 20 + #define MAX_SIZE 100 + uint8_t i; + FILE* fp = NULL; + char* location = NULL; + char eglStrings[CFG_TOTAL][MAX_STRING]; + char buffer[MAX_SIZE]; + + strncpy( eglStrings[CFG_MODE], "egl_mode=", MAX_STRING ); + strncpy( eglStrings[CFG_VSYNC], "use_vsync=", MAX_STRING ); + strncpy( eglStrings[CFG_FSAA], "use_fsaa=", MAX_STRING ); + strncpy( eglStrings[CFG_RED_SIZE], "size_red=", MAX_STRING ); + strncpy( eglStrings[CFG_GREEN_SIZE], "size_green=", MAX_STRING ); + strncpy( eglStrings[CFG_BLUE_SIZE], "size_blue=", MAX_STRING ); + strncpy( eglStrings[CFG_ALPHA_SIZE], "size_alpha=", MAX_STRING ); + strncpy( eglStrings[CFG_DEPTH_SIZE], "size_depth=", MAX_STRING ); + strncpy( eglStrings[CFG_BUFFER_SIZE], "size_buffer=", MAX_STRING ); + strncpy( eglStrings[CFG_STENCIL_SIZE], "size_stencil=", MAX_STRING ); + + /* Set defaults */ +#if defined(USE_EGL_SDL) + eglSettings[CFG_MODE] = RENDER_SDL; +#else + eglSettings[CFG_MODE] = RENDER_RAW; +#endif + eglSettings[CFG_VSYNC] = 0; + eglSettings[CFG_FSAA] = 0; + eglSettings[CFG_FPS] = 0; + eglSettings[CFG_RED_SIZE] = 5; + eglSettings[CFG_GREEN_SIZE] = 6; + eglSettings[CFG_BLUE_SIZE] = 5; + eglSettings[CFG_ALPHA_SIZE] = 0; + eglSettings[CFG_DEPTH_SIZE] = 16; + eglSettings[CFG_BUFFER_SIZE] = 16; + eglSettings[CFG_STENCIL_SIZE] = 0; + + /* Parse INI file */ + fp = fopen( file, "r"); + if (fp != NULL) + { + while (fgets( buffer, MAX_SIZE, fp ) != NULL) + { + for (i=0; i 0) ? 1 : 0; /* 20 */ + ConfigAttribs[attrib++] = EGL_SAMPLES; /* 21 */ + ConfigAttribs[attrib++] = eglSettings[CFG_FSAA]; /* 22 */ + ConfigAttribs[attrib++] = EGL_NONE; /* 23 */ + + result = peglChooseConfig( eglDisplay, ConfigAttribs, eglConfigs, totalConfigsIn, &totalConfigsFound ); + if (result != EGL_TRUE || totalConfigsFound == 0) + { + CheckEGLErrors( __FILE__, __LINE__ ); + printf( "EGLport ERROR: Unable to query for available configs, found %d.\n", totalConfigsFound ); + return 1; + } + printf( "EGLport: Found %d available configs\n", totalConfigsFound ); + + return 0; +} + +/** @brief Error checking function + * @param file : string reference that contains the source file that the check is occuring in + * @param line : numeric reference that contains the line number that the check is occuring in + * @return : 0 if the function passed, else 1 + */ +int8_t CheckEGLErrors( const char* file, uint16_t line ) +{ + EGLenum error; + const char* errortext; + const char* description; + + error = eglGetError(); + + if (error != EGL_SUCCESS && error != 0) + { + switch (error) + { + case EGL_NOT_INITIALIZED: + errortext = "EGL_NOT_INITIALIZED."; + description = "EGL is not or could not be initialized, for the specified display."; + break; + case EGL_BAD_ACCESS: + errortext = "EGL_BAD_ACCESS EGL"; + description = "cannot access a requested resource (for example, a context is bound in another thread)."; + break; + case EGL_BAD_ALLOC: + errortext = "EGL_BAD_ALLOC EGL"; + description = "failed to allocate resources for the requested operation."; + break; + case EGL_BAD_ATTRIBUTE: + errortext = "EGL_BAD_ATTRIBUTE"; + description = "An unrecognized attribute or attribute value was passed in anattribute list."; + break; + case EGL_BAD_CONFIG: + errortext = "EGL_BAD_CONFIG"; + description = "An EGLConfig argument does not name a valid EGLConfig."; + break; + case EGL_BAD_CONTEXT: + errortext = "EGL_BAD_CONTEXT"; + description = "An EGLContext argument does not name a valid EGLContext."; + break; + case EGL_BAD_CURRENT_SURFACE: + errortext = "EGL_BAD_CURRENT_SURFACE"; + description = "The current surface of the calling thread is a window, pbuffer,or pixmap that is no longer valid."; + break; + case EGL_BAD_DISPLAY: + errortext = "EGL_BAD_DISPLAY"; + description = "An EGLDisplay argument does not name a valid EGLDisplay."; + break; + case EGL_BAD_MATCH: + errortext = "EGL_BAD_MATCH"; + description = "Arguments are inconsistent; for example, an otherwise valid context requires buffers (e.g. depth or stencil) not allocated by an otherwise valid surface."; + break; + case EGL_BAD_NATIVE_PIXMAP: + errortext = "EGL_BAD_NATIVE_PIXMAP"; + description = "An EGLNativePixmapType argument does not refer to a validnative pixmap."; + break; + case EGL_BAD_NATIVE_WINDOW: + errortext = "EGL_BAD_NATIVE_WINDOW"; + description = "An EGLNativeWindowType argument does not refer to a validnative window."; + break; + case EGL_BAD_PARAMETER: + errortext = "EGL_BAD_PARAMETER"; + description = "One or more argument values are invalid."; + break; + case EGL_BAD_SURFACE: + errortext = "EGL_BAD_SURFACE"; + description = "An EGLSurface argument does not name a valid surface (window,pbuffer, or pixmap) configured for rendering"; + break; + case EGL_CONTEXT_LOST: + errortext = "EGL_CONTEXT_LOST"; + description = "A power management event has occurred. The application mustdestroy all contexts and reinitialise client API state and objects to continue rendering."; + break; + default: + errortext = "Unknown EGL Error"; + description = ""; + break; + } + + printf( "EGLport ERROR: EGL Error detected in file %s at line %d: %s (0x%X)\n Description: %s\n", file, line, errortext, error, description ); + return 1; + } + + return 0; +} + +/** @brief Obtain a reference to the system's native display + * @param window : pointer to save the display reference + * @return : 0 if the function passed, else 1 + */ +int8_t GetNativeDisplay( void ) +{ + if (eglSettings[CFG_MODE] == RENDER_RAW) /* RAW FB mode */ + { + printf( "EGLport: Using EGL_DEFAULT_DISPLAY\n" ); + nativeDisplay = EGL_DEFAULT_DISPLAY; + } + else if (eglSettings[CFG_MODE] == RENDER_SDL) /* SDL/X11 mode */ + { +#if defined(USE_EGL_SDL) + printf( "EGLport: Opening SDL/X11 display\n" ); + SDL_VERSION(&sysWmInfo.version); + SDL_GetWMInfo(&sysWmInfo); + nativeDisplay = (EGLNativeDisplayType)sysWmInfo.info.x11.display; + + if (nativeDisplay == 0) + { + printf( "EGLport ERROR: unable to get display!\n" ); + return 1; + } +#else + printf( "EGLport ERROR: SDL mode was not enabled in this compile!\n" ); +#endif + } + + return 0; +} + +/** @brief Obtain a reference to the system's native window + * @param width : desired pixel width of the window (not used by all platforms) + * @param height : desired pixel height of the window (not used by all platforms) + * @return : 0 if the function passed, else 1 + */ +int8_t GetNativeWindow( uint16_t width, uint16_t height ) +{ + nativeWindow = 0; + +#if defined(WIZ) || defined(CAANOO) + + nativeWindow = (NativeWindowType)malloc(16*1024); + + if(nativeWindow == NULL) { + printf( "EGLport ERROR: Memory for window Failed\n" ); + return 1; + } + +#elif defined(RPI) + + EGLBoolean result; + uint32_t screen_width, screen_height; + static EGL_DISPMANX_WINDOW_T nativewindow; + DISPMANX_ELEMENT_HANDLE_T dispman_element; + DISPMANX_DISPLAY_HANDLE_T dispman_display; + DISPMANX_UPDATE_HANDLE_T dispman_update; + VC_RECT_T dst_rect; + VC_RECT_T src_rect; + + /* create an EGL window surface */ + result = graphics_get_display_size(0 /* LCD */, &screen_width, &screen_height); + if(result < 0) { + printf( "EGLport ERROR: RPi graphicget_display_size failed\n" ); + return 1; + } + + dst_rect.x = 0; + dst_rect.y = 0; + dst_rect.width = screen_width; + dst_rect.height = screen_height; + + src_rect.x = 0; + src_rect.y = 0; + src_rect.width = width << 16; + src_rect.height = height << 16; + + dispman_display = vc_dispmanx_display_open( 0 /* LCD */); + dispman_update = vc_dispmanx_update_start( 0 ); + dispman_element = vc_dispmanx_element_add ( dispman_update, dispman_display, + 0 /*layer*/, &dst_rect, 0 /*src*/, + &src_rect, DISPMANX_PROTECTION_NONE, (VC_DISPMANX_ALPHA_T*)0 /*alpha*/, (DISPMANX_CLAMP_T*)0 /*clamp*/, (DISPMANX_TRANSFORM_T)0 /*transform*/); + + nativewindow.element = dispman_element; + nativewindow.width = screen_width; + nativewindow.height = screen_height; + vc_dispmanx_update_submit_sync( dispman_update ); + + nativeWindow = (NativeWindowType)&nativewindow; + +#else /* default */ + + if (eglSettings[CFG_MODE] == RENDER_RAW) /* RAW FB mode */ + { + nativeWindow = 0; + } + else if(eglSettings[CFG_MODE] == RENDER_SDL) /* SDL/X11 mode */ + { +#if defined(USE_EGL_SDL) + /* SDL_GetWMInfo is populated when display was opened */ + nativeWindow = (NativeWindowType)sysWmInfo.info.x11.window; + + if (nativeWindow == 0) + { + printf( "EGLport ERROR: unable to get window!\n" ); + return 1; + } +#else + printf( "EGLport ERROR: SDL mode was not enabled in this compile!\n" ); +#endif + } + else + { + printf( "EGLport ERROR: Unknown EGL render mode %d!\n", eglSettings[CFG_MODE] ); + return 1; + } + +#endif /* WIZ / CAANOO */ + + return 0; +} + +/** @brief Release the system's native display + */ +void FreeNativeDisplay( void ) +{ +} + +/** @brief Release the system's native window + */ +void FreeNativeWindow( void ) +{ +#if defined(WIZ) || defined(CAANOO) + if (nativeWindow != NULL) { + free( nativeWindow ); + } + nativeWindow = NULL; +#endif /* WIZ / CAANOO */ +} + +/** @brief Open any system specific resources + */ +void Platform_Open( void ) +{ +#if defined(PANDORA) + /* Pandora VSync */ + fbdev = open( "/dev/fb0", O_RDONLY /* O_RDWR */ ); + if ( fbdev < 0 ) { + printf( "EGLport ERROR: Couldn't open /dev/fb0 for Pandora Vsync\n" ); + } +#elif defined(RPI) + bcm_host_init(); +#endif /* PANDORA */ +} + +/** @brief Release any system specific resources + */ +void Platform_Close( void ) +{ +#if defined(PANDORA) + /* Pandora VSync */ + close( fbdev ); + fbdev = -1; +#endif /* PANDORA */ +} + +/** @brief Check the systems vsync state + */ +void Platform_VSync( void ) +{ +#if defined(PANDORA) + /* Pandora VSync */ + if (fbdev >= 0) { + int arg = 0; + ioctl( fbdev, FBIO_WAITFORVSYNC, &arg ); + } +#endif /* PANDORA */ +} + +/** @brief Get the system tick time (ms) + */ +uint32_t Platform_GetTicks( void ) +{ + uint32_t ticks = 0; +#if defined(USE_EGL_SDL) + ticks = SDL_GetTicks(); +#else + printf( "EGLport ERROR: SDL mode was not enabled in this compile!\n" ); +#endif + return ticks; +} diff --git a/source/gles2n64/src/eglport.h b/source/gles2n64/src/eglport.h new file mode 100755 index 0000000..736456c --- /dev/null +++ b/source/gles2n64/src/eglport.h @@ -0,0 +1,108 @@ +/** + * + * EGLPORT.H + * Copyright (C) 2011-2013 Scott R. Smith + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#ifndef EGLPORT_H +#define EGLPORT_H + +#include +#include "EGL/egl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Defines (in every case choose only one) */ +/** Common: */ +/** DEBUG : enable additional error monitoring per EGL function call */ +/** Native display and window system for use with EGL */ +/** USE_EGL_SDL : used for access to a SDL X11 window */ +/** Platform: settings that are specific to that device */ +/** PANDORA (USE_GLES1 or USE_GLES2) */ +/** WIZ (USE_GLES1) */ +/** CAANOO (USE_GLES1) */ +/** RPI (USE_GLES1 or USE_GLES2) */ +/** GLES Version */ +/** USE_GLES1 : EGL for use with OpenGL-ES 1.X contexts */ +/** USE_GLES2 : EGL for use with OpenGL-ES 2.0 contexts */ + +/** Public API */ +void EGL_Close ( void ); +int8_t EGL_Open ( uint16_t width, uint16_t height ); +void EGL_SwapBuffers ( void ); + +extern int8_t eglColorbits; +extern int8_t eglDepthbits; +extern int8_t eglStencilbits; + +/** Simple Examples */ +/** Raw mode: + EGL_Open( window_width, window_height ); + do while(!quit) { + ... run app + EGL_SwapBuffers(); + } + EGL_Close(); +*/ +/** X11/SDL mode: + SDL_Init( SDL_INIT_VIDEO ); + SDL_Surface* screen = SDL_SetVideoMode(640, 480, 16, SDL_SWSURFACE|SDL_FULLSCREEN); + EGL_Open( window_width, window_height ); + do while(!quit) { + ... run app + EGL_SwapBuffers(); + } + EGL_Close(); + SDL_Quit(); +*/ + +#if defined(DEBUG) +#define GET_EGLERROR(FUNCTION) \ + FUNCTION; \ + { \ + CheckEGLErrors(__FILE__, __LINE__); \ + } +#else +#define GET_EGLERROR(FUNCTION) FUNCTION; +#endif + +#define peglQueryString(A,B) GET_EGLERROR(eglQueryString(A,B)) +#define peglDestroyContext(A,B) GET_EGLERROR(eglDestroyContext(A,B)) +#define peglDestroySurface(A,B) GET_EGLERROR(eglDestroySurface(A,B)) +#define peglTerminate(A) GET_EGLERROR(eglTerminate(A)) +#define peglSwapBuffers(A,B) GET_EGLERROR(eglSwapBuffers(A,B)) +#define peglGetDisplay(A) GET_EGLERROR(eglGetDisplay(A)) +#define peglBindAPI(A) GET_EGLERROR(eglBindAPI(A)) +#define peglCreateContext(A,B,C,D) GET_EGLERROR(eglCreateContext(A,B,C,D)) +#define peglCreateWindowSurface(A,B,C,D) GET_EGLERROR(eglCreateWindowSurface(A,B,C,D)) +#define peglInitialize(A,B,C) GET_EGLERROR(eglInitialize(A,B,C)) +#define peglMakeCurrent(A,B,C,D) GET_EGLERROR(eglMakeCurrent(A,B,C,D)) +#define peglChooseConfig(A,B,C,D,E) GET_EGLERROR(eglChooseConfig(A,B,C,D,E)) +#define peglSwapInterval(A,B) GET_EGLERROR(eglSwapInterval(A,B)) + +#ifdef __cplusplus +} +#endif + +#endif /* EGLPORT_H */ diff --git a/source/gles2n64/src/gDP.cpp b/source/gles2n64/src/gDP.cpp new file mode 100644 index 0000000..afe65ff --- /dev/null +++ b/source/gles2n64/src/gDP.cpp @@ -0,0 +1,970 @@ +#include + +#include "gles2N64.h" +#include "N64.h" +#include "GBI.h" +#include "RSP.h" +#include "gDP.h" +#include "gSP.h" +#include "Types.h" +#include "Debug.h" +#include "convert.h" +#include "OpenGL.h" +#include "CRC.h" +#include "DepthBuffer.h" +#include "VI.h" +#include "Config.h" + + +//thank rice_video for this: +bool _IsRenderTexture() +{ + bool foundSetScissor=false; + bool foundFillRect=false; + bool foundSetFillColor=false; + bool foundSetCImg=false; + bool foundTxtRect=false; + int height; + unsigned int newFillColor = 0; + unsigned int dwPC = RSP.PC[RSP.PCi]; // This points to the next instruction + + for(int i=0; i<10; i++ ) + { + unsigned int w0 = *(unsigned int *)(RDRAM + dwPC + i*8); + unsigned int w1 = *(unsigned int *)(RDRAM + dwPC + 4 + i*8); + + if ((w0>>24) == G_SETSCISSOR) + { + height = ((w1>>0 )&0xFFF)/4; + foundSetScissor = true; + continue; + } + + if ((w0>>24) == G_SETFILLCOLOR) + { + height = ((w1>>0 )&0xFFF)/4; + foundSetFillColor = true; + newFillColor = w1; + continue; + } + + if ((w0>>24) == G_FILLRECT) + { + unsigned int x0 = ((w1>>12)&0xFFF)/4; + unsigned int y0 = ((w1>>0 )&0xFFF)/4; + unsigned int x1 = ((w0>>12)&0xFFF)/4; + unsigned int y1 = ((w0>>0 )&0xFFF)/4; + + if (x0 == 0 && y0 == 0) + { + if( x1 == gDP.colorImage.width) + { + height = y1; + foundFillRect = true; + continue; + } + + if(x1 == (unsigned int)(gDP.colorImage.width-1)) + { + height = y1+1; + foundFillRect = true; + continue; + } + } + } + + if ((w0>>24) == G_TEXRECT) + { + foundTxtRect = true; + break; + } + + if ((w0>>24) == G_SETCIMG) + { + foundSetCImg = true; + break; + } + } + + if (foundFillRect ) + { + if (foundSetFillColor) + { + if (newFillColor != 0xFFFCFFFC) + return true; // this is a render_texture + else + return false; + } + + if (gDP.fillColor.i == 0x00FFFFF7) + return true; // this is a render_texture + else + return false; // this is a normal ZImg + } + else if (foundSetFillColor && newFillColor == 0xFFFCFFFC && foundSetCImg ) + { + return false; + } + else + return true; + + + if (!foundSetCImg) return true; + + if (foundSetScissor ) return true; + + return false; +} + +gDPInfo gDP; + +void gDPSetOtherMode( u32 mode0, u32 mode1 ) +{ + gDP.otherMode.h = mode0; + gDP.otherMode.l = mode1; + gDP.changed |= CHANGED_RENDERMODE | CHANGED_CYCLETYPE | CHANGED_ALPHACOMPARE; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetOtherMode( %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | %s, %s | %s | %s%s%s%s%s | %s | %s%s%s );\n", + AlphaDitherText[gDP.otherMode.alphaDither], + ColorDitherText[gDP.otherMode.colorDither], + CombineKeyText[gDP.otherMode.combineKey], + TextureConvertText[gDP.otherMode.textureConvert], + TextureFilterText[gDP.otherMode.textureFilter], + TextureLUTText[gDP.otherMode.textureLUT], + TextureLODText[gDP.otherMode.textureLOD], + TextureDetailText[gDP.otherMode.textureDetail], + TexturePerspText[gDP.otherMode.texturePersp], + CycleTypeText[gDP.otherMode.cycleType], + PipelineModeText[gDP.otherMode.pipelineMode], + AlphaCompareText[gDP.otherMode.alphaCompare], + DepthSourceText[gDP.otherMode.depthSource], + gDP.otherMode.AAEnable ? "AA_EN | " : "", + gDP.otherMode.depthCompare ? "Z_CMP | " : "", + gDP.otherMode.depthUpdate ? "Z_UPD | " : "", + gDP.otherMode.imageRead ? "IM_RD | " : "", + CvgDestText[gDP.otherMode.cvgDest], + DepthModeText[gDP.otherMode.depthMode], + gDP.otherMode.cvgXAlpha ? "CVG_X_ALPHA | " : "", + gDP.otherMode.alphaCvgSel ? "ALPHA_CVG_SEL | " : "", + gDP.otherMode.forceBlender ? "FORCE_BL" : "" ); +#endif +} + +void gDPSetPrimDepth( u16 z, u16 dz ) +{ + z = z&0x7FFF; + + //gDP.primDepth.z = (_FIXED2FLOAT( z, 15 ) - gSP.viewport.vtrans[2]) / gSP.viewport.vscale[2] ; + gDP.primDepth.z = (z - gSP.viewport.vtrans[2]) / gSP.viewport.vscale[2] ; + gDP.primDepth.deltaZ = dz; + gDP.changed |= CHANGED_PRIMITIVEZ; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetPrimDepth( %f, %f );\n", + gDP.primDepth.z, + gDP.primDepth.deltaZ); +#endif +} + +void gDPPipelineMode( u32 mode ) +{ + gDP.otherMode.pipelineMode = mode; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPPipelineMode( %s );\n", + PipelineModeText[gDP.otherMode.pipelineMode] ); +#endif +} + +void gDPSetCycleType( u32 type ) +{ + gDP.otherMode.cycleType = type; + gDP.changed |= CHANGED_CYCLETYPE; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetCycleType( %s );\n", + CycleTypeText[gDP.otherMode.cycleType] ); +#endif +} + +void gDPSetTexturePersp( u32 enable ) +{ + gDP.otherMode.texturePersp = enable; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTexturePersp( %s );\n", + TexturePerspText[gDP.otherMode.texturePersp] ); +#endif +} + +void gDPSetTextureDetail( u32 type ) +{ + gDP.otherMode.textureDetail = type; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureDetail( %s );\n", + TextureDetailText[gDP.otherMode.textureDetail] ); +#endif +} + +void gDPSetTextureLOD( u32 mode ) +{ + gDP.otherMode.textureLOD = mode; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureLOD( %s );\n", + TextureLODText[gDP.otherMode.textureLOD] ); +#endif +} + +void gDPSetTextureLUT( u32 mode ) +{ + gDP.otherMode.textureLUT = mode; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureLUT( %s );\n", + TextureLUTText[gDP.otherMode.textureLUT] ); +#endif +} + +void gDPSetTextureFilter( u32 type ) +{ + gDP.otherMode.textureFilter = type; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureFilter( %s );\n", + TextureFilterText[gDP.otherMode.textureFilter] ); +#endif +} + +void gDPSetTextureConvert( u32 type ) +{ + gDP.otherMode.textureConvert = type; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureConvert( %s );\n", + TextureConvertText[gDP.otherMode.textureConvert] ); +#endif +} + +void gDPSetCombineKey( u32 type ) +{ + gDP.otherMode.combineKey = type; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetCombineKey( %s );\n", + CombineKeyText[gDP.otherMode.combineKey] ); +#endif +} + +void gDPSetColorDither( u32 type ) +{ + gDP.otherMode.colorDither = type; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetColorDither( %s );\n", + ColorDitherText[gDP.otherMode.colorDither] ); +#endif +} + +void gDPSetAlphaDither( u32 type ) +{ + gDP.otherMode.alphaDither = type; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetAlphaDither( %s );\n", + AlphaDitherText[gDP.otherMode.alphaDither] ); +#endif +} + +void gDPSetAlphaCompare( u32 mode ) +{ + gDP.otherMode.alphaCompare = mode; + gDP.changed |= CHANGED_ALPHACOMPARE; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetAlphaCompare( %s );\n", + AlphaCompareText[gDP.otherMode.alphaCompare] ); +#endif +} + +void gDPSetDepthSource( u32 source ) +{ + gDP.otherMode.depthSource = source; + gDP.changed |= CHANGED_DEPTHSOURCE; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetDepthSource( %s );\n", + DepthSourceText[gDP.otherMode.depthSource] ); +#endif +} + +void gDPSetRenderMode( u32 mode1, u32 mode2 ) +{ + gDP.otherMode.l &= 0x00000007; + gDP.otherMode.l |= mode1 | mode2; + gDP.changed |= CHANGED_RENDERMODE; + +#ifdef DEBUG + // THIS IS INCOMPLETE!!! + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetRenderMode( %s%s%s%s%s | %s | %s%s%s );\n", + gDP.otherMode.AAEnable ? "AA_EN | " : "", + gDP.otherMode.depthCompare ? "Z_CMP | " : "", + gDP.otherMode.depthUpdate ? "Z_UPD | " : "", + gDP.otherMode.imageRead ? "IM_RD | " : "", + CvgDestText[gDP.otherMode.cvgDest], + DepthModeText[gDP.otherMode.depthMode], + gDP.otherMode.cvgXAlpha ? "CVG_X_ALPHA | " : "", + gDP.otherMode.alphaCvgSel ? "ALPHA_CVG_SEL | " : "", + gDP.otherMode.forceBlender ? "FORCE_BL" : "" ); +#endif +} + +void gDPSetCombine( s32 muxs0, s32 muxs1 ) +{ + gDP.combine.muxs0 = muxs0; + gDP.combine.muxs1 = muxs1; + gDP.changed |= CHANGED_COMBINE; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetCombine( %s, %s, %s, %s, %s, %s, %s, %s,\n", + saRGBText[gDP.combine.saRGB0], + sbRGBText[gDP.combine.sbRGB0], + mRGBText[gDP.combine.mRGB0], + aRGBText[gDP.combine.aRGB0], + saAText[gDP.combine.saA0], + sbAText[gDP.combine.sbA0], + mAText[gDP.combine.mA0], + aAText[gDP.combine.aA0] ); + + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, " %s, %s, %s, %s, %s, %s, %s, %s );\n", + saRGBText[gDP.combine.saRGB1], + sbRGBText[gDP.combine.sbRGB1], + mRGBText[gDP.combine.mRGB1], + aRGBText[gDP.combine.aRGB1], + saAText[gDP.combine.saA1], + sbAText[gDP.combine.sbA1], + mAText[gDP.combine.mA1], + aAText[gDP.combine.aA1] ); + +#endif +} + +void gDPSetColorImage( u32 format, u32 size, u32 width, u32 address ) +{ + if (config.updateMode == SCREEN_UPDATE_AT_CI_CHANGE) + OGL_SwapBuffers(); + + if (config.updateMode == SCREEN_UPDATE_AT_1ST_CI_CHANGE && OGL.screenUpdate) + OGL_SwapBuffers(); + + u32 addr = RSP_SegmentToPhysical( address ); + + if (gDP.colorImage.address != addr) + { + gDP.colorImage.changed = FALSE; + if (width == VI.width) + gDP.colorImage.height = VI.height; + else + gDP.colorImage.height = 1; + } + + gDP.colorImage.format = format; + gDP.colorImage.size = size; + gDP.colorImage.width = width; + gDP.colorImage.address = addr; + + if (config.ignoreOffscreenRendering) + { + int i; + + //colorimage byte size: + //color image height is not the best thing to base this on, its normally set + //later on in the code + + if (gDP.colorImage.address == gDP.depthImageAddress) + { + OGL.renderingToTexture = false; + } + else if (size == G_IM_SIZ_16b && format == G_IM_FMT_RGBA) + { + int s = 0; + switch(size) + { + case G_IM_SIZ_4b: s = (gDP.colorImage.width * gDP.colorImage.height) / 2; break; + case G_IM_SIZ_8b: s = (gDP.colorImage.width * gDP.colorImage.height); break; + case G_IM_SIZ_16b: s = (gDP.colorImage.width * gDP.colorImage.height) * 2; break; + case G_IM_SIZ_32b: s = (gDP.colorImage.width * gDP.colorImage.height) * 4; break; + } + u32 start = addr & 0x00FFFFFF; + u32 end = min(start + s, RDRAMSize); + for(i = 0; i < VI.displayNum; i++) + { + if (VI.display[i].start <= end && VI.display[i].start >= start) break; + if (start <= VI.display[i].end && start >= VI.display[i].start) break; + } + + OGL.renderingToTexture = (i == VI.displayNum); + } + else + { + OGL.renderingToTexture = true; + } + +#if 0 + if (OGL.renderingToTexture) + { + printf("start=%i end=%i\n", start, end); + printf("display="); + for(int i=0; i< VI.displayNum; i++) printf("%i,%i:", VI.display[i].start, VI.display[i].end); + printf("\n"); + } +#endif + } + else + { + OGL.renderingToTexture = false; + } + + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetColorImage( %s, %s, %i, 0x%08X );\n", + ImageFormatText[gDP.colorImage.format], + ImageSizeText[gDP.colorImage.size], + gDP.colorImage.width, + gDP.colorImage.address ); +#endif +} + +void gDPSetTextureImage( u32 format, u32 size, u32 width, u32 address ) +{ + gDP.textureImage.format = format; + gDP.textureImage.size = size; + gDP.textureImage.width = width; + gDP.textureImage.address = RSP_SegmentToPhysical( address ); + gDP.textureImage.bpl = gDP.textureImage.width << gDP.textureImage.size >> 1; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTextureImage( %s, %s, %i, 0x%08X );\n", + ImageFormatText[gDP.textureImage.format], + ImageSizeText[gDP.textureImage.size], + gDP.textureImage.width, + gDP.textureImage.address ); +#endif +} + +void gDPSetDepthImage( u32 address ) +{ +// if (address != gDP.depthImageAddress) +// OGL_ClearDepthBuffer(); + + u32 addr = RSP_SegmentToPhysical(address); + DepthBuffer_SetBuffer(addr); + + if (depthBuffer.current->cleared) + OGL_ClearDepthBuffer(); + + gDP.depthImageAddress = addr; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetDepthImage( 0x%08X );\n", gDP.depthImageAddress ); +#endif +} + +void gDPSetEnvColor( u32 r, u32 g, u32 b, u32 a ) +{ + gDP.envColor.r = r * 0.0039215689f; + gDP.envColor.g = g * 0.0039215689f; + gDP.envColor.b = b * 0.0039215689f; + gDP.envColor.a = a * 0.0039215689f; + + gDP.changed |= CHANGED_ENV_COLOR; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetEnvColor( %u, %u, %u, %u );\n", + r, g, b, a ); +#endif +} + +void gDPSetBlendColor( u32 r, u32 g, u32 b, u32 a ) +{ + gDP.blendColor.r = r * 0.0039215689f; + gDP.blendColor.g = g * 0.0039215689f; + gDP.blendColor.b = b * 0.0039215689f; + gDP.blendColor.a = a * 0.0039215689f; + gDP.changed |= CHANGED_BLENDCOLOR; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetBlendColor( %u, %u, %u, %u );\n", + r, g, b, a ); +#endif +} + +void gDPSetFogColor( u32 r, u32 g, u32 b, u32 a ) +{ + gDP.fogColor.r = r * 0.0039215689f; + gDP.fogColor.g = g * 0.0039215689f; + gDP.fogColor.b = b * 0.0039215689f; + gDP.fogColor.a = a * 0.0039215689f; + + gDP.changed |= CHANGED_FOGCOLOR; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetFogColor( %u, %u, %u, %u );\n", + r, g, b, a ); +#endif +} + +void gDPSetFillColor( u32 c ) +{ + + gDP.fillColor.i = c; + gDP.fillColor.r = _SHIFTR( c, 11, 5 ) * 0.032258064f; + gDP.fillColor.g = _SHIFTR( c, 6, 5 ) * 0.032258064f; + gDP.fillColor.b = _SHIFTR( c, 1, 5 ) * 0.032258064f; + gDP.fillColor.a = _SHIFTR( c, 0, 1 ); + + gDP.fillColor.z = _SHIFTR( c, 2, 14 ); + gDP.fillColor.dz = _SHIFTR( c, 0, 2 ); + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPSetFillColor( 0x%08X );\n", c ); +#endif +} + +void gDPSetPrimColor( u32 m, u32 l, u32 r, u32 g, u32 b, u32 a ) +{ + gDP.primColor.m = m; + gDP.primColor.l = l * 0.0039215689f; + gDP.primColor.r = r * 0.0039215689f; + gDP.primColor.g = g * 0.0039215689f; + gDP.primColor.b = b * 0.0039215689f; + gDP.primColor.a = a * 0.0039215689f; + + gDP.changed |= CHANGED_PRIM_COLOR; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_COMBINE, "gDPSetPrimColor( %u, %u, %u, %u, %u, %u );\n", + m, l, r, g, b, a ); +#endif +} + +void gDPSetTile( u32 format, u32 size, u32 line, u32 tmem, u32 tile, u32 palette, u32 cmt, u32 cms, u32 maskt, u32 masks, u32 shiftt, u32 shifts ) +{ + if (((size == G_IM_SIZ_4b) || (size == G_IM_SIZ_8b)) && (format == G_IM_FMT_RGBA)) + format = G_IM_FMT_CI; + + gDP.tiles[tile].format = format; + gDP.tiles[tile].size = size; + gDP.tiles[tile].line = line; + gDP.tiles[tile].tmem = tmem; + gDP.tiles[tile].palette = palette; + gDP.tiles[tile].cmt = cmt; + gDP.tiles[tile].cms = cms; + gDP.tiles[tile].maskt = maskt; + gDP.tiles[tile].masks = masks; + gDP.tiles[tile].shiftt = shiftt; + gDP.tiles[tile].shifts = shifts; + + if (!gDP.tiles[tile].masks) gDP.tiles[tile].clamps = 1; + if (!gDP.tiles[tile].maskt) gDP.tiles[tile].clampt = 1; +} + +void gDPSetTileSize( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ) +{ + gDP.tiles[tile].uls = _SHIFTR( uls, 2, 10 ); + gDP.tiles[tile].ult = _SHIFTR( ult, 2, 10 ); + gDP.tiles[tile].lrs = _SHIFTR( lrs, 2, 10 ); + gDP.tiles[tile].lrt = _SHIFTR( lrt, 2, 10 ); + + gDP.tiles[tile].fuls = _FIXED2FLOAT( uls, 2 ); + gDP.tiles[tile].fult = _FIXED2FLOAT( ult, 2 ); + gDP.tiles[tile].flrs = _FIXED2FLOAT( lrs, 2 ); + gDP.tiles[tile].flrt = _FIXED2FLOAT( lrt, 2 ); + + gDP.changed |= CHANGED_TILE; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPSetTileSize( %u, %.2f, %.2f, %.2f, %.2f );\n", + tile, + gDP.tiles[tile].fuls, + gDP.tiles[tile].fult, + gDP.tiles[tile].flrs, + gDP.tiles[tile].flrt ); +#endif +} + +void gDPLoadTile( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ) +{ + void (*Interleave)( void *mem, u32 numDWords ); + + u32 address, height, bpl, line, y; + u64 *dest; + u8 *src; + + gDPSetTileSize( tile, uls, ult, lrs, lrt ); + gDP.loadTile = &gDP.tiles[tile]; + + if (gDP.loadTile->line == 0) + return; + + address = gDP.textureImage.address + gDP.loadTile->ult * gDP.textureImage.bpl + (gDP.loadTile->uls << gDP.textureImage.size >> 1); + dest = &TMEM[gDP.loadTile->tmem]; + bpl = (gDP.loadTile->lrs - gDP.loadTile->uls + 1) << gDP.loadTile->size >> 1; + height = gDP.loadTile->lrt - gDP.loadTile->ult + 1; + src = &RDRAM[address]; + + if (((address + height * bpl) > RDRAMSize) || + (((gDP.loadTile->tmem << 3) + bpl * height) > 4096)) // Stay within TMEM + { +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_ERROR | DEBUG_TEXTURE, "// Attempting to load texture tile out of range\n" ); + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTile( %u, %i, %i, %i, %i );\n", + tile, gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt ); +#endif + return; + } + + // Line given for 32-bit is half what it seems it should since they split the + // high and low words. I'm cheating by putting them together. + if (gDP.loadTile->size == G_IM_SIZ_32b) + { + line = gDP.loadTile->line << 1; + Interleave = QWordInterleave; + } + else + { + line = gDP.loadTile->line; + Interleave = DWordInterleave; + } + + for (y = 0; y < height; y++) + { + UnswapCopy( src, dest, bpl ); + if (y & 1) Interleave( dest, line ); + + src += gDP.textureImage.bpl; + dest += line; + } + + gDP.textureMode = TEXTUREMODE_NORMAL; + gDP.loadType = LOADTYPE_TILE; + gDP.changed |= CHANGED_TMEM; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTile( %u, %i, %i, %i, %i );\n", + tile, gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt ); +#endif +} + +void gDPLoadBlock( u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt ) +{ + gDPSetTileSize( tile, uls, ult, lrs, dxt ); + gDP.loadTile = &gDP.tiles[tile]; + + u32 bytes = (lrs + 1) << gDP.loadTile->size >> 1; + u32 address = gDP.textureImage.address + ult * gDP.textureImage.bpl + (uls << gDP.textureImage.size >> 1); + + if ((bytes == 0) || + ((address + bytes) > RDRAMSize) || + (((gDP.loadTile->tmem << 3) + bytes) > 4096)) + { +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_ERROR | DEBUG_TEXTURE, "// Attempting to load texture block out of range\n" ); + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadBlock( %u, %u, %u, %u, %u );\n", + tile, uls, ult, lrs, dxt ); +#endif +// bytes = min( bytes, min( RDRAMSize - gDP.textureImage.address, 4096 - (gDP.loadTile->tmem << 3) ) ); + return; + } + + u64* src = (u64*)&RDRAM[address]; + u64* dest = &TMEM[gDP.loadTile->tmem]; + + if (dxt > 0) + { + u32 line = (2047 + dxt) / dxt; + u32 bpl = line << 3; + u32 height = bytes / bpl; + + if (gDP.loadTile->size == G_IM_SIZ_32b) + { + for (u32 y = 0; y < height; y++) + { + UnswapCopy( src, dest, bpl ); + if (y & 1) QWordInterleave( dest, line ); + src += line; + dest += line; + } + } + else + { + for (u32 y = 0; y < height; y++) + { + UnswapCopy( src, dest, bpl ); + if (y & 1) DWordInterleave( dest, line ); + src += line; + dest += line; + } + + } + + } + else + UnswapCopy( src, dest, bytes ); + + gDP.textureMode = TEXTUREMODE_NORMAL; + gDP.loadType = LOADTYPE_BLOCK; + gDP.changed |= CHANGED_TMEM; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadBlock( %u, %u, %u, %u, %u );\n", + tile, uls, ult, lrs, dxt ); +#endif +} + +void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ) +{ + gDPSetTileSize( tile, uls, ult, lrs, lrt ); + + u16 count = (gDP.tiles[tile].lrs - gDP.tiles[tile].uls + 1) * (gDP.tiles[tile].lrt - gDP.tiles[tile].ult + 1); + u32 address = gDP.textureImage.address + gDP.tiles[tile].ult * gDP.textureImage.bpl + (gDP.tiles[tile].uls << gDP.textureImage.size >> 1); + + u16 *dest = (u16*)&TMEM[gDP.tiles[tile].tmem]; + u16 *src = (u16*)&RDRAM[address]; + + u16 pal = (gDP.tiles[tile].tmem - 256) >> 4; + + int i = 0; + while (i < count) + { + for (u16 j = 0; (j < 16) && (i < count); j++, i++) + { + u16 color = swapword( src[i^1] ); + + *dest = color; + //dest[1] = color; + //dest[2] = color; + //dest[3] = color; + + dest += 4; + } + + gDP.paletteCRC16[pal] = CRC_CalculatePalette( 0xFFFFFFFF, &TMEM[256 + (pal << 4)], 16 ); + pal++; + } + + gDP.paletteCRC256 = CRC_Calculate( 0xFFFFFFFF, gDP.paletteCRC16, 64 ); + + gDP.changed |= CHANGED_TMEM; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTLUT( %u, %i, %i, %i, %i );\n", + tile, gDP.tiles[tile].uls, gDP.tiles[tile].ult, gDP.tiles[tile].lrs, gDP.tiles[tile].lrt ); +#endif +} + +void gDPSetScissor( u32 mode, f32 ulx, f32 uly, f32 lrx, f32 lry ) +{ + gDP.scissor.mode = mode; + gDP.scissor.ulx = ulx; + gDP.scissor.uly = uly; + gDP.scissor.lrx = lrx; + gDP.scissor.lry = lry; + gDP.changed |= CHANGED_SCISSOR; + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPSetScissor( %s, %.2f, %.2f, %.2f, %.2f );\n", + ScissorModeText[gDP.scissor.mode], + gDP.scissor.ulx, + gDP.scissor.uly, + gDP.scissor.lrx, + gDP.scissor.lry ); +#endif +} + +void gDPFillRectangle( s32 ulx, s32 uly, s32 lrx, s32 lry ) +{ + DepthBuffer *buffer = DepthBuffer_FindBuffer( gDP.colorImage.address ); + + if (buffer) + buffer->cleared = TRUE; + + if (gDP.depthImageAddress == gDP.colorImage.address) + { + OGL_ClearDepthBuffer(); + return; + } + + if (gDP.otherMode.cycleType == G_CYC_FILL) + { + lrx++; + lry++; + + if ((ulx == 0) && (uly == 0) && ((unsigned int)lrx == VI.width) && ((unsigned int)lry == VI.height)) + { + OGL_ClearColorBuffer( &gDP.fillColor.r ); + return; + } + } + + //shouldn't this be primitive color? + //OGL_DrawRect( ulx, uly, lrx, lry, (gDP.otherMode.cycleType == G_CYC_FILL) ? &gDP.fillColor.r : &gDP.blendColor.r ); + //OGL_DrawRect( ulx, uly, lrx, lry, (gDP.otherMode.cycleType == G_CYC_FILL) ? &gDP.fillColor.r : &gDP.primColor.r); + + float black[] = {0,0,0,0}; + OGL_DrawRect( ulx, uly, lrx, lry, (gDP.otherMode.cycleType == G_CYC_FILL) ? &gDP.fillColor.r : black); + + if (depthBuffer.current) depthBuffer.current->cleared = FALSE; + gDP.colorImage.changed = TRUE; + gDP.colorImage.height = max( gDP.colorImage.height, (unsigned int)lry ); + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPFillRectangle( %i, %i, %i, %i );\n", + ulx, uly, lrx, lry ); +#endif +} + +void gDPSetConvert( s32 k0, s32 k1, s32 k2, s32 k3, s32 k4, s32 k5 ) +{ + gDP.convert.k0 = k0 * 0.0039215689f; + gDP.convert.k1 = k1 * 0.0039215689f; + gDP.convert.k2 = k2 * 0.0039215689f; + gDP.convert.k3 = k3 * 0.0039215689f; + gDP.convert.k4 = k4 * 0.0039215689f; + gDP.convert.k5 = k5 * 0.0039215689f; + gDP.changed |= CHANGED_CONVERT; +} + +void gDPSetKeyR( u32 cR, u32 sR, u32 wR ) +{ + gDP.key.center.r = cR * 0.0039215689f;; + gDP.key.scale.r = sR * 0.0039215689f;; + gDP.key.width.r = wR * 0.0039215689f;; +} + +void gDPSetKeyGB(u32 cG, u32 sG, u32 wG, u32 cB, u32 sB, u32 wB ) +{ + gDP.key.center.g = cG * 0.0039215689f;; + gDP.key.scale.g = sG * 0.0039215689f;; + gDP.key.width.g = wG * 0.0039215689f;; + gDP.key.center.b = cB * 0.0039215689f;; + gDP.key.scale.b = sB * 0.0039215689f;; + gDP.key.width.b = wB * 0.0039215689f;; +} + +void gDPTextureRectangle( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy ) +{ + if (gDP.colorImage.address == gDP.depthImageAddress) + { + return; + } + + if (gDP.otherMode.cycleType == G_CYC_COPY) + { + dsdx = 1.0f; + lrx += 1.0f; + lry += 1.0f; + } + + gSP.textureTile[0] = &gDP.tiles[tile]; + gSP.textureTile[1] = &gDP.tiles[(tile < 7) ? (tile + 1) : tile]; + + + f32 lrs; + f32 lrt; + if (RSP.cmd == G_TEXRECTFLIP) + { + lrs = s + (lry - uly - 1) * dtdy; + lrt = t + (lrx - ulx - 1) * dsdx; + } + else + { + lrs = s + (lrx - ulx - 1) * dsdx; + lrt = t + (lry - uly - 1) * dtdy; + } + + if (gDP.textureMode == TEXTUREMODE_NORMAL) + gDP.textureMode = TEXTUREMODE_TEXRECT; + + gDP.texRect.width = (unsigned int)(max( lrs, s ) + dsdx); + gDP.texRect.height = (unsigned int)(max( lrt, t ) + dtdy); + + float tmp; + if (lrs < s) + { + tmp = ulx; ulx = lrx; lrx = tmp; + tmp = s; s = lrs; lrs = tmp; + } + if (lrt < t) + { + tmp = uly; uly = lry; lry = tmp; + tmp = t; t = lrt; lrt = tmp; + } + + OGL_DrawTexturedRect( ulx, uly, lrx, lry, s, t, lrs, lrt, (RSP.cmd == G_TEXRECTFLIP)); + + gSP.textureTile[0] = &gDP.tiles[gSP.texture.tile]; + gSP.textureTile[1] = &gDP.tiles[(gSP.texture.tile < 7) ? (gSP.texture.tile + 1) : gSP.texture.tile]; + + if (depthBuffer.current) depthBuffer.current->cleared = FALSE; + gDP.colorImage.changed = TRUE; + gDP.colorImage.height = (unsigned int)(max( gDP.colorImage.height, gDP.scissor.lry )); + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPTextureRectangle( %f, %f, %f, %f, %i, %f, %f, %f, %f );\n", + ulx, uly, lrx, lry, tile, s, t, dsdx, dtdy ); +#endif +} + +void gDPTextureRectangleFlip( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy ) +{ + //gDPTextureRectangle( ulx, uly, lrx, lry, tile, s + (lrx - ulx) * dsdx, t + (lry - uly) * dtdy, -dsdx, -dtdy ); + + gDPTextureRectangle( ulx, uly, lrx, lry, tile, s, t, dsdx, dtdy ); +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPTextureRectangleFlip( %f, %f, %f, %f, %i, %f, %f, %f, %f);\n", + ulx, uly, lrx, lry, tile, s, t, dsdx, dtdy ); +#endif +} + +void gDPFullSync() +{ + *REG.MI_INTR |= MI_INTR_DP; + + CheckInterrupts(); + +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gDPFullSync();\n" ); +#endif +} + +void gDPTileSync() +{ +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_IGNORED | DEBUG_TEXTURE, "gDPTileSync();\n" ); +#endif +} + +void gDPPipeSync() +{ +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPPipeSync();\n" ); +#endif +} + +void gDPLoadSync() +{ +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPLoadSync();\n" ); +#endif +} + +void gDPNoOp() +{ +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_IGNORED, "gDPNoOp();\n" ); +#endif +} + diff --git a/source/gles2n64/src/gDP.h b/source/gles2n64/src/gDP.h new file mode 100644 index 0000000..e3d8125 --- /dev/null +++ b/source/gles2n64/src/gDP.h @@ -0,0 +1,289 @@ +#ifndef GDP_H +#define GDP_H + +#include "Types.h" + +#define CHANGED_RENDERMODE 0x0001 +#define CHANGED_CYCLETYPE 0x0002 +#define CHANGED_SCISSOR 0x0004 +#define CHANGED_TMEM 0x0008 +#define CHANGED_TILE 0x0010 +#define CHANGED_COMBINE_COLORS 0x0020 +#define CHANGED_COMBINE 0x0040 +#define CHANGED_ALPHACOMPARE 0x0080 +#define CHANGED_FOGCOLOR 0x0100 +#define CHANGED_DEPTHSOURCE 0x0200 +#define CHANGED_PRIMITIVEZ 0x0400 +#define CHANGED_ENV_COLOR 0x0800 +#define CHANGED_PRIM_COLOR 0x1000 +#define CHANGED_BLENDCOLOR 0x2000 +#define CHANGED_CONVERT 0x4000 + +#define TEXTUREMODE_NORMAL 0 +#define TEXTUREMODE_TEXRECT 1 +#define TEXTUREMODE_BGIMAGE 2 +#define TEXTUREMODE_FRAMEBUFFER 3 + +#define LOADTYPE_BLOCK 0 +#define LOADTYPE_TILE 1 + +struct gDPCombine +{ + union + { + struct + { + // muxs1 + unsigned aA1 : 3; + unsigned sbA1 : 3; + unsigned aRGB1 : 3; + unsigned aA0 : 3; + unsigned sbA0 : 3; + unsigned aRGB0 : 3; + unsigned mA1 : 3; + unsigned saA1 : 3; + unsigned sbRGB1 : 4; + unsigned sbRGB0 : 4; + + // muxs0 + unsigned mRGB1 : 5; + unsigned saRGB1 : 4; + unsigned mA0 : 3; + unsigned saA0 : 3; + unsigned mRGB0 : 5; + unsigned saRGB0 : 4; + }; + + struct + { + u32 muxs1, muxs0; + }; + + u64 mux; + }; +}; + +struct gDPTile +{ + u32 format, size, line, tmem, palette; + + union + { + struct + { + unsigned mirrort : 1; + unsigned clampt : 1; + unsigned pad0 : 30; + + unsigned mirrors : 1; + unsigned clamps : 1; + unsigned pad1 : 30; + }; + + struct + { + u32 cmt, cms; + }; + }; + + //FrameBuffer *frameBuffer; + u32 maskt, masks; + u32 shiftt, shifts; + f32 fuls, fult, flrs, flrt; + u32 uls, ult, lrs, lrt; +}; + +struct gDPInfo +{ + struct + { + union + { + struct + { + unsigned int alphaCompare : 2; + unsigned int depthSource : 1; + +// struct +// { + unsigned int AAEnable : 1; + unsigned int depthCompare : 1; + unsigned int depthUpdate : 1; + unsigned int imageRead : 1; + unsigned int clearOnCvg : 1; + + unsigned int cvgDest : 2; + unsigned int depthMode : 2; + + unsigned int cvgXAlpha : 1; + unsigned int alphaCvgSel : 1; + unsigned int forceBlender : 1; + unsigned int textureEdge : 1; +// } renderMode; + + //struct + //{ + unsigned int c2_m2b : 2; + unsigned int c1_m2b : 2; + unsigned int c2_m2a : 2; + unsigned int c1_m2a : 2; + unsigned int c2_m1b : 2; + unsigned int c1_m1b : 2; + unsigned int c2_m1a : 2; + unsigned int c1_m1a : 2; + //} blender; + + unsigned int blendMask : 4; + unsigned int alphaDither : 2; + unsigned int colorDither : 2; + + unsigned int combineKey : 1; + unsigned int textureConvert : 3; + unsigned int textureFilter : 2; + unsigned int textureLUT : 2; + + unsigned int textureLOD : 1; + unsigned int textureDetail : 2; + unsigned int texturePersp : 1; + unsigned int cycleType : 2; + unsigned int unusedColorDither : 1; // unsupported + unsigned int pipelineMode : 1; + + unsigned int pad : 8; + + }; + + u64 _u64; + + struct + { + u32 l, h; + }; + }; + } otherMode; + + gDPCombine combine; + + gDPTile tiles[8], *loadTile; + + struct + { + f32 r, g, b, a; + } fogColor, blendColor, envColor; + + struct + { + unsigned int i; + f32 r, g, b, a; + f32 z, dz; + } fillColor; + + struct + { + u32 m; + f32 l, r, g, b, a; + } primColor; + + struct + { + f32 z, deltaZ; + } primDepth; + + struct + { + u32 format, size, width, bpl; + u32 address; + } textureImage; + + struct + { + u32 format, size, width, height, bpl; + u32 address, changed; + u32 depthImage; + } colorImage; + + u32 depthImageAddress; + + struct + { + u32 mode; + f32 ulx, uly, lrx, lry; + } scissor; + + struct + { + f32 k0, k1, k2, k3, k4, k5; + } convert; + + struct + { + struct + { + f32 r, g, b, a; + } center, scale, width; + } key; + + struct + { + u32 width, height; + } texRect; + + u32 changed; + + //u16 palette[256]; + u32 paletteCRC16[16]; + u32 paletteCRC256; + u32 half_1, half_2; + u32 textureMode; + u32 loadType; +}; + +extern gDPInfo gDP; + +void gDPSetOtherMode( u32 mode0, u32 mode1 ); +void gDPSetPrimDepth( u16 z, u16 dz ); +void gDPPipelineMode( u32 mode ); +void gDPSetCycleType( u32 type ); +void gDPSetTexturePersp( u32 enable ); +void gDPSetTextureDetail( u32 type ); +void gDPSetTextureLOD( u32 mode ); +void gDPSetTextureLUT( u32 mode ); +void gDPSetTextureFilter( u32 type ); +void gDPSetTextureConvert( u32 type ); +void gDPSetCombineKey( u32 type ); +void gDPSetColorDither( u32 type ); +void gDPSetAlphaDither( u32 type ); +void gDPSetAlphaCompare( u32 mode ); +void gDPSetDepthSource( u32 source ); +void gDPSetRenderMode( u32 mode1, u32 mode2 ); +void gDPSetCombine( s32 muxs0, s32 muxs1 ); +void gDPSetColorImage( u32 format, u32 size, u32 width, u32 address ); +void gDPSetTextureImage( u32 format, u32 size, u32 width, u32 address ); +void gDPSetDepthImage( u32 address ); +void gDPSetEnvColor( u32 r, u32 g, u32 b, u32 a ); +void gDPSetBlendColor( u32 r, u32 g, u32 b, u32 a ); +void gDPSetFogColor( u32 r, u32 g, u32 b, u32 a ); +void gDPSetFillColor( u32 c ); +void gDPSetPrimColor( u32 m, u32 l, u32 r, u32 g, u32 b, u32 a ); +void gDPSetTile(u32 format, const u32 size, const u32 line, const u32 tmem, u32 tile, + const u32 palette, const u32 cmt, const u32 cms, const u32 maskt, const u32 masks, + const u32 shiftt, const u32 shifts ); +void gDPSetTileSize( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ); +void gDPLoadTile( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ); +void gDPLoadBlock( u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt ); +void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ); +void gDPSetScissor( u32 mode, f32 ulx, f32 uly, f32 lrx, f32 lry ); +void gDPFillRectangle( s32 ulx, s32 uly, s32 lrx, s32 lry ); +void gDPSetConvert( s32 k0, s32 k1, s32 k2, s32 k3, s32 k4, s32 k5 ); +void gDPSetKeyR( u32 cR, u32 sR, u32 wR ); +void gDPSetKeyGB(u32 cG, u32 sG, u32 wG, u32 cB, u32 sB, u32 wB ); +void gDPTextureRectangle( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy ); +void gDPTextureRectangleFlip( f32 ulx, f32 uly, f32 lrx, f32 lry, s32 tile, f32 s, f32 t, f32 dsdx, f32 dtdy ); +void gDPFullSync(); +void gDPTileSync(); +void gDPPipeSync(); +void gDPLoadSync(); +void gDPNoOp(); + +#endif + diff --git a/source/gles2n64/src/gSP.cpp b/source/gles2n64/src/gSP.cpp new file mode 100644 index 0000000..2b845f9 --- /dev/null +++ b/source/gles2n64/src/gSP.cpp @@ -0,0 +1,1738 @@ +#include +#include + +#include "Common.h" +#include "gles2N64.h" +#include "Debug.h" +#include "Types.h" +#include "RSP.h" +#include "GBI.h" +#include "gSP.h" +#include "gDP.h" +#include "3DMath.h" +#include "OpenGL.h" +#include "CRC.h" +#include +#include "convert.h" +#include "S2DEX.h" +#include "VI.h" +#include "DepthBuffer.h" +#include "Config.h" + +//Note: 0xC0 is used by 1080 alot, its an unknown command. + +#ifdef DEBUG +extern u32 uc_crc, uc_dcrc; +extern char uc_str[256]; +#endif + +void gSPCombineMatrices(); + +//#ifdef __TRIBUFFER_OPT +void __indexmap_init() +{ + memset(OGL.triangles.indexmapinv, 0xFF, VERTBUFF_SIZE*sizeof(u32)); + for(int i=0;i= VERTBUFF_SIZE) {i=0; c=0;} + n++; + } + return (c == num) ? (i-num+1) : (0xFFFFFFFF); +} + +void __indexmap_undomap() +{ + SPVertex tmp[INDEXMAP_SIZE]; + memset(OGL.triangles.indexmapinv, 0xFF, VERTBUFF_SIZE * sizeof(u32)); + + for(int i=0;i VERTBUFF_SIZE) + { + OGL_DrawTriangles(); + ind = __indexmap_findunused(num); + + //OK the indices are spread so sparsely, we cannot find a num element block. + if (ind > VERTBUFF_SIZE) + { + __indexmap_undomap(); + ind = __indexmap_findunused(num); + if (ind > VERTBUFF_SIZE) + { + LOG(LOG_ERROR, "Could not allocate %i indices\n", num); + + LOG(LOG_VERBOSE, "indexmap=["); + for(int i=0;icleared = FALSE; + gDP.colorImage.changed = TRUE; + gDP.colorImage.height = (unsigned int)(max( gDP.colorImage.height, gDP.scissor.lry )); +} + +void gSP1Triangle( const s32 v0, const s32 v1, const s32 v2) +{ + gSPTriangle( v0, v1, v2); + gSPFlushTriangles(); +} + +void gSP2Triangles(const s32 v00, const s32 v01, const s32 v02, const s32 flag0, + const s32 v10, const s32 v11, const s32 v12, const s32 flag1 ) +{ + gSPTriangle( v00, v01, v02); + gSPTriangle( v10, v11, v12); + gSPFlushTriangles(); +} + +void gSP4Triangles(const s32 v00, const s32 v01, const s32 v02, + const s32 v10, const s32 v11, const s32 v12, + const s32 v20, const s32 v21, const s32 v22, + const s32 v30, const s32 v31, const s32 v32 ) +{ + gSPTriangle(v00, v01, v02); + gSPTriangle(v10, v11, v12); + gSPTriangle(v20, v21, v22); + gSPTriangle(v30, v31, v32); + gSPFlushTriangles(); +} + + +gSPInfo gSP; + +f32 identityMatrix[4][4] = +{ + { 1.0f, 0.0f, 0.0f, 0.0f }, + { 0.0f, 1.0f, 0.0f, 0.0f }, + { 0.0f, 0.0f, 1.0f, 0.0f }, + { 0.0f, 0.0f, 0.0f, 1.0f } +}; + +#ifdef __VEC4_OPT +static void gSPTransformVertex4_default(u32 v, float mtx[4][4]) +{ + float x, y, z, w; + int i; + for(i = 0; i < 4; i++) + { + x = OGL.triangles.vertices[v+i].x; + y = OGL.triangles.vertices[v+i].y; + z = OGL.triangles.vertices[v+i].z; + w = OGL.triangles.vertices[v+i].w; + OGL.triangles.vertices[v+i].x = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0]; + OGL.triangles.vertices[v+i].y = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1]; + OGL.triangles.vertices[v+i].z = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2]; + OGL.triangles.vertices[v+i].w = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3]; + } +} + +void gSPClipVertex4(u32 v) +{ + int i; + for(i = 0; i < 4; i++){ + SPVertex *vtx = &OGL.triangles.vertices[v+i]; + vtx->clip = 0; + if (vtx->x > +vtx->w) vtx->clip |= CLIP_POSX; + if (vtx->x < -vtx->w) vtx->clip |= CLIP_NEGX; + if (vtx->y > +vtx->w) vtx->clip |= CLIP_POSY; + if (vtx->y < -vtx->w) vtx->clip |= CLIP_NEGY; + } +} + +static void gSPTransformNormal4_default(u32 v, float mtx[4][4]) +{ + float len, x, y, z; + int i; + for(i = 0; i < 4; i++){ + x = OGL.triangles.vertices[v+i].nx; + y = OGL.triangles.vertices[v+i].ny; + z = OGL.triangles.vertices[v+i].nz; + + OGL.triangles.vertices[v+i].nx = mtx[0][0]*x + mtx[1][0]*y + mtx[2][0]*z; + OGL.triangles.vertices[v+i].ny = mtx[0][1]*x + mtx[1][1]*y + mtx[2][1]*z; + OGL.triangles.vertices[v+i].nz = mtx[0][2]*x + mtx[1][2]*y + mtx[2][2]*z; + len = OGL.triangles.vertices[v+i].nx*OGL.triangles.vertices[v+i].nx + + OGL.triangles.vertices[v+i].ny*OGL.triangles.vertices[v+i].ny + + OGL.triangles.vertices[v+i].nz*OGL.triangles.vertices[v+i].nz; + if (len != 0.0) + { + len = sqrtf(len); + OGL.triangles.vertices[v+i].nx /= len; + OGL.triangles.vertices[v+i].ny /= len; + OGL.triangles.vertices[v+i].nz /= len; + } + } +} + +static void gSPLightVertex4_default(u32 v) +{ + gSPTransformNormal4(v, gSP.matrix.modelView[gSP.matrix.modelViewi]); + for(int j = 0; j < 4; j++) + { + f32 r,g,b; + r = gSP.lights[gSP.numLights].r; + g = gSP.lights[gSP.numLights].g; + b = gSP.lights[gSP.numLights].b; + + for (int i = 0; i < gSP.numLights; i++) + { + f32 intensity = DotProduct( &OGL.triangles.vertices[v+j].nx, &gSP.lights[i].x ); + if (intensity < 0.0f) intensity = 0.0f; +/* +// paulscode, cause of the shader bug (not applying intensity to correct varriables) + OGL.triangles.vertices[v+j].r += gSP.lights[i].r * intensity; + OGL.triangles.vertices[v+j].g += gSP.lights[i].g * intensity; + OGL.triangles.vertices[v+j].b += gSP.lights[i].b * intensity; +*/ +//// paulscode, shader bug-fix: + r += gSP.lights[i].r * intensity; + g += gSP.lights[i].g * intensity; + b += gSP.lights[i].b * intensity; +//// + } + OGL.triangles.vertices[v+j].r = min(1.0f, r); + OGL.triangles.vertices[v+j].g = min(1.0f, g); + OGL.triangles.vertices[v+j].b = min(1.0f, b); + } +} + +static void gSPBillboardVertex4_default(u32 v) +{ + + int i = 0; +#ifdef __TRIBUFFER_OPT + i = OGL.triangles.indexmap[0]; +#endif + + OGL.triangles.vertices[v].x += OGL.triangles.vertices[i].x; + OGL.triangles.vertices[v].y += OGL.triangles.vertices[i].y; + OGL.triangles.vertices[v].z += OGL.triangles.vertices[i].z; + OGL.triangles.vertices[v].w += OGL.triangles.vertices[i].w; + OGL.triangles.vertices[v+1].x += OGL.triangles.vertices[i].x; + OGL.triangles.vertices[v+1].y += OGL.triangles.vertices[i].y; + OGL.triangles.vertices[v+1].z += OGL.triangles.vertices[i].z; + OGL.triangles.vertices[v+1].w += OGL.triangles.vertices[i].w; + OGL.triangles.vertices[v+2].x += OGL.triangles.vertices[i].x; + OGL.triangles.vertices[v+2].y += OGL.triangles.vertices[i].y; + OGL.triangles.vertices[v+2].z += OGL.triangles.vertices[i].z; + OGL.triangles.vertices[v+2].w += OGL.triangles.vertices[i].w; + OGL.triangles.vertices[v+3].x += OGL.triangles.vertices[i].x; + OGL.triangles.vertices[v+3].y += OGL.triangles.vertices[i].y; + OGL.triangles.vertices[v+3].z += OGL.triangles.vertices[i].z; + OGL.triangles.vertices[v+3].w += OGL.triangles.vertices[i].w; +} + +void gSPProcessVertex4(u32 v) +{ + if (gSP.changed & CHANGED_MATRIX) + gSPCombineMatrices(); + + gSPTransformVertex4(v, gSP.matrix.combined ); + + if (config.screen.flipVertical) + { + OGL.triangles.vertices[v+0].y = -OGL.triangles.vertices[v+0].y; + OGL.triangles.vertices[v+1].y = -OGL.triangles.vertices[v+1].y; + OGL.triangles.vertices[v+2].y = -OGL.triangles.vertices[v+2].y; + OGL.triangles.vertices[v+3].y = -OGL.triangles.vertices[v+3].y; + } + + if (gDP.otherMode.depthSource) + { + OGL.triangles.vertices[v+0].z = gDP.primDepth.z * OGL.triangles.vertices[v+0].w; + OGL.triangles.vertices[v+1].z = gDP.primDepth.z * OGL.triangles.vertices[v+1].w; + OGL.triangles.vertices[v+2].z = gDP.primDepth.z * OGL.triangles.vertices[v+2].w; + OGL.triangles.vertices[v+3].z = gDP.primDepth.z * OGL.triangles.vertices[v+3].w; + } + + if (gSP.matrix.billboard) + gSPBillboardVertex4(v); + + if (!(gSP.geometryMode & G_ZBUFFER)) + { + OGL.triangles.vertices[v].z = -OGL.triangles.vertices[v].w; + OGL.triangles.vertices[v+1].z = -OGL.triangles.vertices[v+1].w; + OGL.triangles.vertices[v+2].z = -OGL.triangles.vertices[v+2].w; + OGL.triangles.vertices[v+3].z = -OGL.triangles.vertices[v+3].w; + } + + if (gSP.geometryMode & G_LIGHTING) + { + if (config.enableLighting) + { + gSPLightVertex4(v); + } + else + { + OGL.triangles.vertices[v].r = 1.0f; + OGL.triangles.vertices[v].g = 1.0f; + OGL.triangles.vertices[v].b = 1.0f; + OGL.triangles.vertices[v+1].r = 1.0f; + OGL.triangles.vertices[v+1].g = 1.0f; + OGL.triangles.vertices[v+1].b = 1.0f; + OGL.triangles.vertices[v+2].r = 1.0f; + OGL.triangles.vertices[v+2].g = 1.0f; + OGL.triangles.vertices[v+2].b = 1.0f; + OGL.triangles.vertices[v+3].r = 1.0f; + OGL.triangles.vertices[v+3].g = 1.0f; + OGL.triangles.vertices[v+3].b = 1.0f; + } + + if (gSP.geometryMode & G_TEXTURE_GEN) + { + gSPTransformNormal4(v, gSP.matrix.projection); + + if (gSP.geometryMode & G_TEXTURE_GEN_LINEAR) + { + OGL.triangles.vertices[v].s = acosf(OGL.triangles.vertices[v].nx) * 325.94931f; + OGL.triangles.vertices[v].t = acosf(OGL.triangles.vertices[v].ny) * 325.94931f; + OGL.triangles.vertices[v+1].s = acosf(OGL.triangles.vertices[v+1].nx) * 325.94931f; + OGL.triangles.vertices[v+1].t = acosf(OGL.triangles.vertices[v+1].ny) * 325.94931f; + OGL.triangles.vertices[v+2].s = acosf(OGL.triangles.vertices[v+2].nx) * 325.94931f; + OGL.triangles.vertices[v+2].t = acosf(OGL.triangles.vertices[v+2].ny) * 325.94931f; + OGL.triangles.vertices[v+3].s = acosf(OGL.triangles.vertices[v+3].nx) * 325.94931f; + OGL.triangles.vertices[v+3].t = acosf(OGL.triangles.vertices[v+3].ny) * 325.94931f; + } + else // G_TEXTURE_GEN + { + OGL.triangles.vertices[v].s = (OGL.triangles.vertices[v].nx + 1.0f) * 512.0f; + OGL.triangles.vertices[v].t = (OGL.triangles.vertices[v].ny + 1.0f) * 512.0f; + OGL.triangles.vertices[v+1].s = (OGL.triangles.vertices[v+1].nx + 1.0f) * 512.0f; + OGL.triangles.vertices[v+1].t = (OGL.triangles.vertices[v+1].ny + 1.0f) * 512.0f; + OGL.triangles.vertices[v+2].s = (OGL.triangles.vertices[v+2].nx + 1.0f) * 512.0f; + OGL.triangles.vertices[v+2].t = (OGL.triangles.vertices[v+2].ny + 1.0f) * 512.0f; + OGL.triangles.vertices[v+3].s = (OGL.triangles.vertices[v+3].nx + 1.0f) * 512.0f; + OGL.triangles.vertices[v+3].t = (OGL.triangles.vertices[v+3].ny + 1.0f) * 512.0f; + } + } + } + + if (config.enableClipping) gSPClipVertex4(v); +} +#endif + +void gSPClipVertex(u32 v) +{ + SPVertex *vtx = &OGL.triangles.vertices[v]; + vtx->clip = 0; + if (vtx->x > +vtx->w) vtx->clip |= CLIP_POSX; + if (vtx->x < -vtx->w) vtx->clip |= CLIP_NEGX; + if (vtx->y > +vtx->w) vtx->clip |= CLIP_POSY; + if (vtx->y < -vtx->w) vtx->clip |= CLIP_NEGY; + //if (vtx->w < 0.1f) vtx->clip |= CLIP_NEGW; +} + +static void gSPTransformVertex_default(float vtx[4], float mtx[4][4]) +{ + float x, y, z, w; + x = vtx[0]; + y = vtx[1]; + z = vtx[2]; + w = vtx[3]; + + vtx[0] = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0]; + vtx[1] = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1]; + vtx[2] = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2]; + vtx[3] = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3]; +} + +static void gSPLightVertex_default(u32 v) +{ + TransformVectorNormalize( &OGL.triangles.vertices[v].nx, gSP.matrix.modelView[gSP.matrix.modelViewi] ); + + f32 r, g, b; + r = gSP.lights[gSP.numLights].r; + g = gSP.lights[gSP.numLights].g; + b = gSP.lights[gSP.numLights].b; + for (int i = 0; i < gSP.numLights; i++) + { + f32 intensity = DotProduct( &OGL.triangles.vertices[v].nx, &gSP.lights[i].x ); + if (intensity < 0.0f) intensity = 0.0f; + r += gSP.lights[i].r * intensity; + g += gSP.lights[i].g * intensity; + b += gSP.lights[i].b * intensity; + } + OGL.triangles.vertices[v].r = min(1.0, r); + OGL.triangles.vertices[v].g = min(1.0, g); + OGL.triangles.vertices[v].b = min(1.0, b); +} + +static void gSPBillboardVertex_default(u32 v, u32 i) +{ + OGL.triangles.vertices[v].x += OGL.triangles.vertices[i].x; + OGL.triangles.vertices[v].y += OGL.triangles.vertices[i].y; + OGL.triangles.vertices[v].z += OGL.triangles.vertices[i].z; + OGL.triangles.vertices[v].w += OGL.triangles.vertices[i].w; +} + +void gSPCombineMatrices() +{ + MultMatrix(gSP.matrix.projection, gSP.matrix.modelView[gSP.matrix.modelViewi], gSP.matrix.combined); + gSP.changed &= ~CHANGED_MATRIX; +} + +void gSPProcessVertex( u32 v ) +{ + f32 intensity; + f32 r, g, b; + + if (gSP.changed & CHANGED_MATRIX) + gSPCombineMatrices(); + + gSPTransformVertex( &OGL.triangles.vertices[v].x, gSP.matrix.combined ); + + if (config.screen.flipVertical) + { + OGL.triangles.vertices[v].y = -OGL.triangles.vertices[v].y; + } + + if (gDP.otherMode.depthSource) + { + OGL.triangles.vertices[v].z = gDP.primDepth.z * OGL.triangles.vertices[v].w; + } + + if (gSP.matrix.billboard) + { + int i = 0; +#ifdef __TRIBUFFER_OPT + i = OGL.triangles.indexmap[0]; +#endif + + gSPBillboardVertex(v, i); + } + + if (!(gSP.geometryMode & G_ZBUFFER)) + { + OGL.triangles.vertices[v].z = -OGL.triangles.vertices[v].w; + } + + if (config.enableClipping) + gSPClipVertex(v); + + if (gSP.geometryMode & G_LIGHTING) + { + if (config.enableLighting) + { + gSPLightVertex(v); + } + else + { + OGL.triangles.vertices[v].r = 1.0f; + OGL.triangles.vertices[v].g = 1.0f; + OGL.triangles.vertices[v].b = 1.0f; + } + + if (gSP.geometryMode & G_TEXTURE_GEN) + { + TransformVectorNormalize(&OGL.triangles.vertices[v].nx, gSP.matrix.projection); + + if (gSP.geometryMode & G_TEXTURE_GEN_LINEAR) + { + OGL.triangles.vertices[v].s = acosf(OGL.triangles.vertices[v].nx) * 325.94931f; + OGL.triangles.vertices[v].t = acosf(OGL.triangles.vertices[v].ny) * 325.94931f; + } + else // G_TEXTURE_GEN + { + OGL.triangles.vertices[v].s = (OGL.triangles.vertices[v].nx + 1.0f) * 512.0f; + OGL.triangles.vertices[v].t = (OGL.triangles.vertices[v].ny + 1.0f) * 512.0f; + } + } + } +} + + +void gSPLoadUcodeEx( u32 uc_start, u32 uc_dstart, u16 uc_dsize ) +{ + RSP.PCi = 0; + gSP.matrix.modelViewi = 0; + gSP.changed |= CHANGED_MATRIX; + gSP.status[0] = gSP.status[1] = gSP.status[2] = gSP.status[3] = 0; + + if ((((uc_start & 0x1FFFFFFF) + 4096) > RDRAMSize) || (((uc_dstart & 0x1FFFFFFF) + uc_dsize) > RDRAMSize)) + { + return; + } + + MicrocodeInfo *ucode = GBI_DetectMicrocode( uc_start, uc_dstart, uc_dsize ); + + if (ucode->type != 0xFFFFFFFF) + last_good_ucode = ucode->type; + + if (ucode->type != NONE) + { + GBI_MakeCurrent( ucode ); + } + else + { + LOG(LOG_WARNING, "Unknown Ucode\n"); + } +} + +void gSPNoOp() +{ + gSPFlushTriangles(); +} + +void gSPTriangleUnknown() +{ +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif +} + +void gSPMatrix( u32 matrix, u8 param ) +{ +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif + + f32 mtx[4][4]; + u32 address = RSP_SegmentToPhysical( matrix ); + + if (address + 64 > RDRAMSize) + { + return; + } + + RSP_LoadMatrix( mtx, address ); + + if (param & G_MTX_PROJECTION) + { + if (param & G_MTX_LOAD) + CopyMatrix( gSP.matrix.projection, mtx ); + else + MultMatrix2( gSP.matrix.projection, mtx ); + } + else + { + if ((param & G_MTX_PUSH) && (gSP.matrix.modelViewi < (gSP.matrix.stackSize - 1))) + { + CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi + 1], gSP.matrix.modelView[gSP.matrix.modelViewi] ); + gSP.matrix.modelViewi++; + } + if (param & G_MTX_LOAD) + CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx ); + else + MultMatrix2( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx ); + } + + gSP.changed |= CHANGED_MATRIX; +} + +void gSPDMAMatrix( u32 matrix, u8 index, u8 multiply ) +{ + f32 mtx[4][4]; + u32 address = gSP.DMAOffsets.mtx + RSP_SegmentToPhysical( matrix ); + + if (address + 64 > RDRAMSize) + { + return; + } + + RSP_LoadMatrix( mtx, address ); + + gSP.matrix.modelViewi = index; + + if (multiply) + { + //CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], gSP.matrix.modelView[0] ); + //MultMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx ); + MultMatrix(gSP.matrix.modelView[0], mtx, gSP.matrix.modelView[gSP.matrix.modelViewi]); + } + else + CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx ); + + CopyMatrix( gSP.matrix.projection, identityMatrix ); + gSP.changed |= CHANGED_MATRIX; +} + +void gSPViewport( u32 v ) +{ + u32 address = RSP_SegmentToPhysical( v ); + + if ((address + 16) > RDRAMSize) + { +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_ERROR, "// Attempting to load viewport from invalid address\n" ); + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gSPViewport( 0x%08X );\n", v ); +#endif + return; + } + + gSP.viewport.vscale[0] = _FIXED2FLOAT( *(s16*)&RDRAM[address + 2], 2 ); + gSP.viewport.vscale[1] = _FIXED2FLOAT( *(s16*)&RDRAM[address ], 2 ); + gSP.viewport.vscale[2] = _FIXED2FLOAT( *(s16*)&RDRAM[address + 6], 10 );// * 0.00097847357f; + gSP.viewport.vscale[3] = *(s16*)&RDRAM[address + 4]; + gSP.viewport.vtrans[0] = _FIXED2FLOAT( *(s16*)&RDRAM[address + 10], 2 ); + gSP.viewport.vtrans[1] = _FIXED2FLOAT( *(s16*)&RDRAM[address + 8], 2 ); + gSP.viewport.vtrans[2] = _FIXED2FLOAT( *(s16*)&RDRAM[address + 14], 10 );// * 0.00097847357f; + gSP.viewport.vtrans[3] = *(s16*)&RDRAM[address + 12]; + + gSP.viewport.x = gSP.viewport.vtrans[0] - gSP.viewport.vscale[0]; + gSP.viewport.y = gSP.viewport.vtrans[1] - gSP.viewport.vscale[1]; + gSP.viewport.width = gSP.viewport.vscale[0] * 2; + gSP.viewport.height = gSP.viewport.vscale[1] * 2; + gSP.viewport.nearz = gSP.viewport.vtrans[2] - gSP.viewport.vscale[2]; + gSP.viewport.farz = (gSP.viewport.vtrans[2] + gSP.viewport.vscale[2]) ; + + gSP.changed |= CHANGED_VIEWPORT; +} + +void gSPForceMatrix( u32 mptr ) +{ + u32 address = RSP_SegmentToPhysical( mptr ); + + if (address + 64 > RDRAMSize) + { + return; + } + + RSP_LoadMatrix( gSP.matrix.combined, RSP_SegmentToPhysical( mptr ) ); + + gSP.changed &= ~CHANGED_MATRIX; +} + +void gSPLight( u32 l, s32 n ) +{ + n--; + if (n >= 8) + return; + + u32 address = RSP_SegmentToPhysical( l ); + + if ((address + sizeof( Light )) > RDRAMSize) + { + return; + } + + u8 *addr = &RDRAM[address]; + + if (config.hackZelda && (addr[0] == 0x08) && (addr[4] == 0xFF)) + { + LightMM *light = (LightMM*)addr; + gSP.lights[n].r = light->r * 0.0039215689f; + gSP.lights[n].g = light->g * 0.0039215689f; + gSP.lights[n].b = light->b * 0.0039215689f; + gSP.lights[n].x = light->x; + gSP.lights[n].y = light->y; + gSP.lights[n].z = light->z; + } + else + { + Light *light = (Light*)addr; + gSP.lights[n].r = light->r * 0.0039215689f; + gSP.lights[n].g = light->g * 0.0039215689f; + gSP.lights[n].b = light->b * 0.0039215689f; + gSP.lights[n].x = light->x; + gSP.lights[n].y = light->y; + gSP.lights[n].z = light->z; + } + Normalize(&gSP.lights[n].x); +} + +void gSPLookAt( u32 l ) +{ +} + +void gSPVertex( u32 v, u32 n, u32 v0 ) +{ + //flush batched triangles: +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif + + u32 address = RSP_SegmentToPhysical( v ); + + if ((address + sizeof( Vertex ) * n) > RDRAMSize) + { + return; + } + + Vertex *vertex = (Vertex*)&RDRAM[address]; + + if ((n + v0) <= INDEXMAP_SIZE) + { + unsigned int i = v0; +#ifdef __VEC4_OPT + for (; i < n - (n%4) + v0; i += 4) + { + u32 v = i; +#ifdef __TRIBUFFER_OPT + v = __indexmap_getnew(v, 4); +#endif + for(int j = 0; j < 4; j++) + { + OGL.triangles.vertices[v+j].x = vertex->x; + OGL.triangles.vertices[v+j].y = vertex->y; + OGL.triangles.vertices[v+j].z = vertex->z; + //OGL.triangles.vertices[i+j].flag = vertex->flag; + OGL.triangles.vertices[v+j].s = _FIXED2FLOAT( vertex->s, 5 ); + OGL.triangles.vertices[v+j].t = _FIXED2FLOAT( vertex->t, 5 ); + if (gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v+j].nx = vertex->normal.x; + OGL.triangles.vertices[v+j].ny = vertex->normal.y; + OGL.triangles.vertices[v+j].nz = vertex->normal.z; + OGL.triangles.vertices[v+j].a = vertex->color.a * 0.0039215689f; + } + else + { + OGL.triangles.vertices[v+j].r = vertex->color.r * 0.0039215689f; + OGL.triangles.vertices[v+j].g = vertex->color.g * 0.0039215689f; + OGL.triangles.vertices[v+j].b = vertex->color.b * 0.0039215689f; + OGL.triangles.vertices[v+j].a = vertex->color.a * 0.0039215689f; + } + vertex++; + } + gSPProcessVertex4(v); + } +#endif + for (; i < n + v0; i++) + { + u32 v = i; +#ifdef __TRIBUFFER_OPT + v = __indexmap_getnew(v, 1); +#endif + OGL.triangles.vertices[v].x = vertex->x; + OGL.triangles.vertices[v].y = vertex->y; + OGL.triangles.vertices[v].z = vertex->z; + OGL.triangles.vertices[v].s = _FIXED2FLOAT( vertex->s, 5 ); + OGL.triangles.vertices[v].t = _FIXED2FLOAT( vertex->t, 5 ); + if (gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v].nx = vertex->normal.x; + OGL.triangles.vertices[v].ny = vertex->normal.y; + OGL.triangles.vertices[v].nz = vertex->normal.z; + OGL.triangles.vertices[v].a = vertex->color.a * 0.0039215689f; + } + else + { + OGL.triangles.vertices[v].r = vertex->color.r * 0.0039215689f; + OGL.triangles.vertices[v].g = vertex->color.g * 0.0039215689f; + OGL.triangles.vertices[v].b = vertex->color.b * 0.0039215689f; + OGL.triangles.vertices[v].a = vertex->color.a * 0.0039215689f; + } + gSPProcessVertex(v); + vertex++; + } + } + else + { + LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); + } + +} + +void gSPCIVertex( u32 v, u32 n, u32 v0 ) +{ + +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif + + u32 address = RSP_SegmentToPhysical( v ); + + if ((address + sizeof( PDVertex ) * n) > RDRAMSize) + { + return; + } + + PDVertex *vertex = (PDVertex*)&RDRAM[address]; + + if ((n + v0) <= INDEXMAP_SIZE) + { + unsigned int i = v0; +#ifdef __VEC4_OPT + for (; i < n - (n%4) + v0; i += 4) + { + u32 v = i; +#ifdef __TRIBUFFER_OPT + v = __indexmap_getnew(v, 4); +#endif + for(unsigned int j = 0; j < 4; j++) + { + OGL.triangles.vertices[v+j].x = vertex->x; + OGL.triangles.vertices[v+j].y = vertex->y; + OGL.triangles.vertices[v+j].z = vertex->z; + OGL.triangles.vertices[v+j].s = _FIXED2FLOAT( vertex->s, 5 ); + OGL.triangles.vertices[v+j].t = _FIXED2FLOAT( vertex->t, 5 ); + u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)]; + + if (gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v+j].nx = (s8)color[3]; + OGL.triangles.vertices[v+j].ny = (s8)color[2]; + OGL.triangles.vertices[v+j].nz = (s8)color[1]; + OGL.triangles.vertices[v+j].a = color[0] * 0.0039215689f; + } + else + { + OGL.triangles.vertices[v+j].r = color[3] * 0.0039215689f; + OGL.triangles.vertices[v+j].g = color[2] * 0.0039215689f; + OGL.triangles.vertices[v+j].b = color[1] * 0.0039215689f; + OGL.triangles.vertices[v+j].a = color[0] * 0.0039215689f; + } + vertex++; + } + gSPProcessVertex4(v); + } +#endif + for(; i < n + v0; i++) + { + u32 v = i; +#ifdef __TRIBUFFER_OPT + v = __indexmap_getnew(v, 1); +#endif + OGL.triangles.vertices[v].x = vertex->x; + OGL.triangles.vertices[v].y = vertex->y; + OGL.triangles.vertices[v].z = vertex->z; + OGL.triangles.vertices[v].s = _FIXED2FLOAT( vertex->s, 5 ); + OGL.triangles.vertices[v].t = _FIXED2FLOAT( vertex->t, 5 ); + u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)]; + + if (gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v].nx = (s8)color[3]; + OGL.triangles.vertices[v].ny = (s8)color[2]; + OGL.triangles.vertices[v].nz = (s8)color[1]; + OGL.triangles.vertices[v].a = color[0] * 0.0039215689f; + } + else + { + OGL.triangles.vertices[v].r = color[3] * 0.0039215689f; + OGL.triangles.vertices[v].g = color[2] * 0.0039215689f; + OGL.triangles.vertices[v].b = color[1] * 0.0039215689f; + OGL.triangles.vertices[v].a = color[0] * 0.0039215689f; + } + + gSPProcessVertex(v); + vertex++; + } + } + else + { + LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); + } + +} + +void gSPDMAVertex( u32 v, u32 n, u32 v0 ) +{ + + u32 address = gSP.DMAOffsets.vtx + RSP_SegmentToPhysical( v ); + + if ((address + 10 * n) > RDRAMSize) + { + return; + } + + if ((n + v0) <= INDEXMAP_SIZE) + { + u32 i = v0; +#ifdef __VEC4_OPT + for (; i < n - (n%4) + v0; i += 4) + { + u32 v = i; +#ifdef __TRIBUFFER_OPT + v = __indexmap_getnew(v, 4); +#endif + for(int j = 0; j < 4; j++) + { + OGL.triangles.vertices[v+j].x = *(s16*)&RDRAM[address ^ 2]; + OGL.triangles.vertices[v+j].y = *(s16*)&RDRAM[(address + 2) ^ 2]; + OGL.triangles.vertices[v+j].z = *(s16*)&RDRAM[(address + 4) ^ 2]; + + if (gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v+j].nx = *(s8*)&RDRAM[(address + 6) ^ 3]; + OGL.triangles.vertices[v+j].ny = *(s8*)&RDRAM[(address + 7) ^ 3]; + OGL.triangles.vertices[v+j].nz = *(s8*)&RDRAM[(address + 8) ^ 3]; + OGL.triangles.vertices[v+j].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; + } + else + { + OGL.triangles.vertices[v+j].r = *(u8*)&RDRAM[(address + 6) ^ 3] * 0.0039215689f; + OGL.triangles.vertices[v+j].g = *(u8*)&RDRAM[(address + 7) ^ 3] * 0.0039215689f; + OGL.triangles.vertices[v+j].b = *(u8*)&RDRAM[(address + 8) ^ 3] * 0.0039215689f; + OGL.triangles.vertices[v+j].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; + } + address += 10; + } + gSPProcessVertex4(v); + } +#endif + for (; i < n + v0; i++) + { + u32 v = i; +#ifdef __TRIBUFFER_OPT + //int ind = OGL.triangles.indexmap[i]; + v = __indexmap_getnew(v, 1); + + //if previously mapped copy across s/t. + //if (ind != -1) + //{ + // SPVertex *vtx = &OGL.triangles.vertices[ind]; + // OGL.triangles.vertices[v].s = vtx->s; + // OGL.triangles.vertices[v].t = vtx->s; + //} +#else + v = i; +#endif + OGL.triangles.vertices[v].x = *(s16*)&RDRAM[address ^ 2]; + OGL.triangles.vertices[v].y = *(s16*)&RDRAM[(address + 2) ^ 2]; + OGL.triangles.vertices[v].z = *(s16*)&RDRAM[(address + 4) ^ 2]; + + if (gSP.geometryMode & G_LIGHTING) + { + OGL.triangles.vertices[v].nx = *(s8*)&RDRAM[(address + 6) ^ 3]; + OGL.triangles.vertices[v].ny = *(s8*)&RDRAM[(address + 7) ^ 3]; + OGL.triangles.vertices[v].nz = *(s8*)&RDRAM[(address + 8) ^ 3]; + OGL.triangles.vertices[v].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; + } + else + { + OGL.triangles.vertices[v].r = *(u8*)&RDRAM[(address + 6) ^ 3] * 0.0039215689f; + OGL.triangles.vertices[v].g = *(u8*)&RDRAM[(address + 7) ^ 3] * 0.0039215689f; + OGL.triangles.vertices[v].b = *(u8*)&RDRAM[(address + 8) ^ 3] * 0.0039215689f; + OGL.triangles.vertices[v].a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; + } + + gSPProcessVertex(v); + address += 10; + } + } + else + { + LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); + } + +} + +void gSPDisplayList( u32 dl ) +{ + u32 address = RSP_SegmentToPhysical( dl ); + + if ((address + 8) > RDRAMSize) + { + return; + } + + if (RSP.PCi < (GBI.PCStackSize - 1)) + { +#ifdef DEBUG + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "\n" ); + DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gSPDisplayList( 0x%08X );\n", + dl ); +#endif + RSP.PCi++; + RSP.PC[RSP.PCi] = address; + RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[address], 24, 8 ); + } + + +} + +void gSPDMADisplayList( u32 dl, u32 n ) +{ + if ((dl + (n << 3)) > RDRAMSize) + { + return; + } + + u32 curDL = RSP.PC[RSP.PCi]; + + RSP.PC[RSP.PCi] = RSP_SegmentToPhysical( dl ); + + while ((RSP.PC[RSP.PCi] - dl) < (n << 3)) + { + if ((RSP.PC[RSP.PCi] + 8) > RDRAMSize) + { + break; + } + + u32 w0 = *(u32*)&RDRAM[RSP.PC[RSP.PCi]]; + u32 w1 = *(u32*)&RDRAM[RSP.PC[RSP.PCi] + 4]; + + RSP.PC[RSP.PCi] += 8; + RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[RSP.PC[RSP.PCi]], 24, 8 ); + + GBI.cmd[_SHIFTR( w0, 24, 8 )]( w0, w1 ); + } + + RSP.PC[RSP.PCi] = curDL; +} + +void gSPBranchList( u32 dl ) +{ + u32 address = RSP_SegmentToPhysical( dl ); + + if ((address + 8) > RDRAMSize) + { + return; + } + + RSP.PC[RSP.PCi] = address; + RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[address], 24, 8 ); +} + +void gSPBranchLessZ( u32 branchdl, u32 vtx, f32 zval ) +{ + u32 address = RSP_SegmentToPhysical( branchdl ); + + if ((address + 8) > RDRAMSize) + { + return; + } + + if (OGL.triangles.vertices[vtx].z <= zval) + RSP.PC[RSP.PCi] = address; +} + +void gSPSetDMAOffsets( u32 mtxoffset, u32 vtxoffset ) +{ + gSP.DMAOffsets.mtx = mtxoffset; + gSP.DMAOffsets.vtx = vtxoffset; +} + +void gSPSetVertexColorBase( u32 base ) +{ + gSP.vertexColorBase = RSP_SegmentToPhysical( base ); + +#ifdef __TRIBUFFER_OPT + gSPFlushTriangles(); +#endif +} + +void gSPSprite2DBase( u32 base ) +{ +} + +void gSPCopyVertex( SPVertex *dest, SPVertex *src ) +{ + dest->x = src->x; + dest->y = src->y; + dest->z = src->z; + dest->w = src->w; + dest->r = src->r; + dest->g = src->g; + dest->b = src->b; + dest->a = src->a; + dest->s = src->s; + dest->t = src->t; +} + +void gSPInterpolateVertex( SPVertex *dest, f32 percent, SPVertex *first, SPVertex *second ) +{ + dest->x = first->x + percent * (second->x - first->x); + dest->y = first->y + percent * (second->y - first->y); + dest->z = first->z + percent * (second->z - first->z); + dest->w = first->w + percent * (second->w - first->w); + dest->r = first->r + percent * (second->r - first->r); + dest->g = first->g + percent * (second->g - first->g); + dest->b = first->b + percent * (second->b - first->b); + dest->a = first->a + percent * (second->a - first->a); + dest->s = first->s + percent * (second->s - first->s); + dest->t = first->t + percent * (second->t - first->t); +} + +void gSPDMATriangles( u32 tris, u32 n ) +{ + u32 address = RSP_SegmentToPhysical( tris ); + + if (address + sizeof( DKRTriangle ) * n > RDRAMSize) + { + return; + } + +#ifdef __TRIBUFFER_OPT + __indexmap_undomap(); +#endif + + DKRTriangle *triangles = (DKRTriangle*)&RDRAM[address]; + + for (u32 i = 0; i < n; i++) + { + int mode = 0; + if (!(triangles->flag & 0x40)) + { + if (gSP.viewport.vscale[0] > 0) + mode |= G_CULL_BACK; + else + mode |= G_CULL_FRONT; + } + + if ((gSP.geometryMode&G_CULL_BOTH) != mode) + { + OGL_DrawTriangles(); + gSP.geometryMode &= ~G_CULL_BOTH; + gSP.geometryMode |= mode; + gSP.changed |= CHANGED_GEOMETRYMODE; + } + + + s32 v0 = triangles->v0; + s32 v1 = triangles->v1; + s32 v2 = triangles->v2; + OGL.triangles.vertices[v0].s = _FIXED2FLOAT( triangles->s0, 5 ); + OGL.triangles.vertices[v0].t = _FIXED2FLOAT( triangles->t0, 5 ); + OGL.triangles.vertices[v1].s = _FIXED2FLOAT( triangles->s1, 5 ); + OGL.triangles.vertices[v1].t = _FIXED2FLOAT( triangles->t1, 5 ); + OGL.triangles.vertices[v2].s = _FIXED2FLOAT( triangles->s2, 5 ); + OGL.triangles.vertices[v2].t = _FIXED2FLOAT( triangles->t2, 5 ); + gSPTriangle(triangles->v0, triangles->v1, triangles->v2); + triangles++; + } + +#ifdef __TRIBUFFER_OPT + OGL_DrawTriangles(); +#endif +} + +void gSP1Quadrangle( s32 v0, s32 v1, s32 v2, s32 v3) +{ + gSPTriangle( v0, v1, v2); + gSPTriangle( v0, v2, v3); + gSPFlushTriangles(); +} + +bool gSPCullVertices( u32 v0, u32 vn ) +{ + if (!config.enableClipping) + return FALSE; + + s32 v = v0; +#ifdef __TRIBUFFER_OPT + v = OGL.triangles.indexmap[v0]; +#endif + + u32 clip = OGL.triangles.vertices[v].clip; + if (clip == 0) + return FALSE; + + for (unsigned int i = (v0+1); i <= vn; i++) + { + v = i; +#ifdef __TRIBUFFER_OPT + v = OGL.triangles.indexmap[i]; +#endif + if (OGL.triangles.vertices[v].clip != clip) return FALSE; + } + return TRUE; +} + +void gSPCullDisplayList( u32 v0, u32 vn ) +{ + if (gSPCullVertices( v0, vn )) + { + if (RSP.PCi > 0) + RSP.PCi--; + else + { + RSP.halt = TRUE; + } + } +} + +void gSPPopMatrixN( u32 param, u32 num ) +{ + if (gSP.matrix.modelViewi > num - 1) + { + gSP.matrix.modelViewi -= num; + + gSP.changed |= CHANGED_MATRIX; + } +} + +void gSPPopMatrix( u32 param ) +{ + if (gSP.matrix.modelViewi > 0) + { + gSP.matrix.modelViewi--; + + gSP.changed |= CHANGED_MATRIX; + } +} + +void gSPSegment( s32 seg, s32 base ) +{ + if (seg > 0xF) + { + return; + } + + if ((unsigned int)base > RDRAMSize - 1) + { + return; + } + + gSP.segment[seg] = base; +} + +void gSPClipRatio( u32 r ) +{ +} + +void gSPInsertMatrix( u32 where, u32 num ) +{ + f32 fraction, integer; + + if (gSP.changed & CHANGED_MATRIX) + gSPCombineMatrices(); + + if ((where & 0x3) || (where > 0x3C)) + { + return; + } + + if (where < 0x20) + { + fraction = modff( gSP.matrix.combined[0][where >> 1], &integer ); + gSP.matrix.combined[0][where >> 1] = (s16)_SHIFTR( num, 16, 16 ) + abs( (int)fraction ); + + fraction = modff( gSP.matrix.combined[0][(where >> 1) + 1], &integer ); + gSP.matrix.combined[0][(where >> 1) + 1] = (s16)_SHIFTR( num, 0, 16 ) + abs( (int)fraction ); + } + else + { + f32 newValue; + + fraction = modff( gSP.matrix.combined[0][(where - 0x20) >> 1], &integer ); + newValue = integer + _FIXED2FLOAT( _SHIFTR( num, 16, 16 ), 16); + + // Make sure the sign isn't lost + if ((integer == 0.0f) && (fraction != 0.0f)) + newValue = newValue * (fraction / abs( (int)fraction )); + + gSP.matrix.combined[0][(where - 0x20) >> 1] = newValue; + + fraction = modff( gSP.matrix.combined[0][((where - 0x20) >> 1) + 1], &integer ); + newValue = integer + _FIXED2FLOAT( _SHIFTR( num, 0, 16 ), 16 ); + + // Make sure the sign isn't lost + if ((integer == 0.0f) && (fraction != 0.0f)) + newValue = newValue * (fraction / abs( (int)fraction )); + + gSP.matrix.combined[0][((where - 0x20) >> 1) + 1] = newValue; + } +} + +void gSPModifyVertex( u32 vtx, u32 where, u32 val ) +{ + s32 v = vtx; + +#ifdef __TRIBUFFER_OPT + v = OGL.triangles.indexmap[v]; +#endif + + switch (where) + { + case G_MWO_POINT_RGBA: + OGL.triangles.vertices[v].r = _SHIFTR( val, 24, 8 ) * 0.0039215689f; + OGL.triangles.vertices[v].g = _SHIFTR( val, 16, 8 ) * 0.0039215689f; + OGL.triangles.vertices[v].b = _SHIFTR( val, 8, 8 ) * 0.0039215689f; + OGL.triangles.vertices[v].a = _SHIFTR( val, 0, 8 ) * 0.0039215689f; + break; + case G_MWO_POINT_ST: + OGL.triangles.vertices[v].s = _FIXED2FLOAT( (s16)_SHIFTR( val, 16, 16 ), 5 ); + OGL.triangles.vertices[v].t = _FIXED2FLOAT( (s16)_SHIFTR( val, 0, 16 ), 5 ); + break; + case G_MWO_POINT_XYSCREEN: + break; + case G_MWO_POINT_ZSCREEN: + break; + } +} + +void gSPNumLights( s32 n ) +{ + gSP.numLights = (n <= 8) ? n : 0; +} + + +void gSPLightColor( u32 lightNum, u32 packedColor ) +{ + lightNum--; + + if (lightNum < 8) + { + gSP.lights[lightNum].r = _SHIFTR( packedColor, 24, 8 ) * 0.0039215689f; + gSP.lights[lightNum].g = _SHIFTR( packedColor, 16, 8 ) * 0.0039215689f; + gSP.lights[lightNum].b = _SHIFTR( packedColor, 8, 8 ) * 0.0039215689f; + } +} + +void gSPFogFactor( s16 fm, s16 fo ) +{ + gSP.fog.multiplier = fm; + gSP.fog.offset = fo; + + gSP.changed |= CHANGED_FOGPOSITION; +} + +void gSPPerspNormalize( u16 scale ) +{ +} + +void gSPTexture( f32 sc, f32 tc, s32 level, s32 tile, s32 on ) +{ + gSP.texture.scales = sc; + gSP.texture.scalet = tc; + + if (gSP.texture.scales == 0.0f) gSP.texture.scales = 1.0f; + if (gSP.texture.scalet == 0.0f) gSP.texture.scalet = 1.0f; + + gSP.texture.level = level; + gSP.texture.on = on; + + if (gSP.texture.tile != tile) + { + gSP.texture.tile = tile; + gSP.textureTile[0] = &gDP.tiles[tile]; + gSP.textureTile[1] = &gDP.tiles[(tile < 7) ? (tile + 1) : tile]; + gSP.changed |= CHANGED_TEXTURE; + } + + gSP.changed |= CHANGED_TEXTURESCALE; +} + +void gSPEndDisplayList() +{ + if (RSP.PCi > 0) + RSP.PCi--; + else + { + RSP.halt = TRUE; + } + +#ifdef __TRIBUFFER_OPT + RSP.nextCmd = _SHIFTR( *(u32*)&RDRAM[RSP.PC[RSP.PCi]], 24, 8 ); + gSPFlushTriangles(); +#endif +} + +void gSPGeometryMode( u32 clear, u32 set ) +{ + gSP.geometryMode = (gSP.geometryMode & ~clear) | set; + gSP.changed |= CHANGED_GEOMETRYMODE; +} + +void gSPSetGeometryMode( u32 mode ) +{ + gSP.geometryMode |= mode; + gSP.changed |= CHANGED_GEOMETRYMODE; +} + +void gSPClearGeometryMode( u32 mode ) +{ + gSP.geometryMode &= ~mode; + gSP.changed |= CHANGED_GEOMETRYMODE; +} + +void gSPLine3D( s32 v0, s32 v1, s32 flag ) +{ + OGL_DrawLine(v0, v1, 1.5f ); +} + +void gSPLineW3D( s32 v0, s32 v1, s32 wd, s32 flag ) +{ + OGL_DrawLine(v0, v1, 1.5f + wd * 0.5f ); +} + +void gSPBgRect1Cyc( u32 bg ) +{ + +#if 1 + + u32 addr = RSP_SegmentToPhysical(bg) >> 1; + + f32 imageX = (((u16*)RDRAM)[(addr+0)^1] >> 5); // 0 + f32 imageY = (((u16*)RDRAM)[(addr+4)^1] >> 5); // 4 + f32 imageW = (((u16*)RDRAM)[(addr+1)^1] >> 2); // 1 + f32 imageH = (((u16*)RDRAM)[(addr+5)^1] >> 2); // 5 + + f32 frameX = ((s16*)RDRAM)[(addr+2)^1] / 4.0f; // 2 + f32 frameY = ((s16*)RDRAM)[(addr+6)^1] / 4.0f; // 6 + f32 frameW = ((u16*)RDRAM)[(addr+3)^1] >> 2; // 3 + f32 frameH = ((u16*)RDRAM)[(addr+7)^1] >> 2; // 7 + + + //wxUint16 imageFlip = ((u16*)gfx.RDRAM)[(addr+13)^1]; // 13; + //d.flipX = (u8)imageFlip&0x01; + + gSP.bgImage.address = RSP_SegmentToPhysical(((u32*)RDRAM)[(addr+8)>>1]); // 8,9 + gSP.bgImage.width = imageW; + gSP.bgImage.height = imageH; + gSP.bgImage.format = ((u8*)RDRAM)[(((addr+11)<<1)+0)^3]; + gSP.bgImage.size = ((u8*)RDRAM)[(((addr+11)<<1)+1)^3]; + gSP.bgImage.palette = ((u16*)RDRAM)[(addr+12)^1]; + + f32 scaleW = ((s16*)RDRAM)[(addr+14)^1] / 1024.0f; // 14 + f32 scaleH = ((s16*)RDRAM)[(addr+15)^1] / 1024.0f; // 15 + gDP.textureMode = TEXTUREMODE_BGIMAGE; + +#else + u32 address = RSP_SegmentToPhysical( bg ); + uObjScaleBg *objScaleBg = (uObjScaleBg*)&RDRAM[address]; + + gSP.bgImage.address = RSP_SegmentToPhysical( objScaleBg->imagePtr ); + gSP.bgImage.width = objScaleBg->imageW >> 2; + gSP.bgImage.height = objScaleBg->imageH >> 2; + gSP.bgImage.format = objScaleBg->imageFmt; + gSP.bgImage.size = objScaleBg->imageSiz; + gSP.bgImage.palette = objScaleBg->imagePal; + gDP.textureMode = TEXTUREMODE_BGIMAGE; + + f32 imageX = _FIXED2FLOAT( objScaleBg->imageX, 5 ); + f32 imageY = _FIXED2FLOAT( objScaleBg->imageY, 5 ); + f32 imageW = objScaleBg->imageW >> 2; + f32 imageH = objScaleBg->imageH >> 2; + + f32 frameX = _FIXED2FLOAT( objScaleBg->frameX, 2 ); + f32 frameY = _FIXED2FLOAT( objScaleBg->frameY, 2 ); + f32 frameW = _FIXED2FLOAT( objScaleBg->frameW, 2 ); + f32 frameH = _FIXED2FLOAT( objScaleBg->frameH, 2 ); + f32 scaleW = _FIXED2FLOAT( objScaleBg->scaleW, 10 ); + f32 scaleH = _FIXED2FLOAT( objScaleBg->scaleH, 10 ); +#endif + + f32 frameX0 = frameX; + f32 frameY0 = frameY; + f32 frameS0 = imageX; + f32 frameT0 = imageY; + + f32 frameX1 = frameX + min( (imageW - imageX) / scaleW, frameW ); + f32 frameY1 = frameY + min( (imageH - imageY) / scaleH, frameH ); + //f32 frameS1 = imageX + min( (imageW - imageX) * scaleW, frameW * scaleW ); + //f32 frameT1 = imageY + min( (imageH - imageY) * scaleH, frameH * scaleH ); + + gDP.otherMode.cycleType = G_CYC_1CYCLE; + gDP.changed |= CHANGED_CYCLETYPE; + gSPTexture( 1.0f, 1.0f, 0, 0, TRUE ); + gDPTextureRectangle( frameX0, frameY0, frameX1 - 1, frameY1 - 1, 0, frameS0 - 1, frameT0 - 1, scaleW, scaleH ); + + if ((frameX1 - frameX0) < frameW) + { + f32 frameX2 = frameW - (frameX1 - frameX0) + frameX1; + gDPTextureRectangle( frameX1, frameY0, frameX2 - 1, frameY1 - 1, 0, 0, frameT0, scaleW, scaleH ); + } + + if ((frameY1 - frameY0) < frameH) + { + f32 frameY2 = frameH - (frameY1 - frameY0) + frameY1; + gDPTextureRectangle( frameX0, frameY1, frameX1 - 1, frameY2 - 1, 0, frameS0, 0, scaleW, scaleH ); + } + + gDPTextureRectangle( 0, 0, 319, 239, 0, 0, 0, scaleW, scaleH ); +} + +void gSPBgRectCopy( u32 bg ) +{ + + return; + u32 address = RSP_SegmentToPhysical( bg ); + uObjBg *objBg = (uObjBg*)&RDRAM[address]; + + gSP.bgImage.address = RSP_SegmentToPhysical( objBg->imagePtr ); + gSP.bgImage.width = objBg->imageW >> 2; + gSP.bgImage.height = objBg->imageH >> 2; + gSP.bgImage.format = objBg->imageFmt; + gSP.bgImage.size = objBg->imageSiz; + gSP.bgImage.palette = objBg->imagePal; + gDP.textureMode = TEXTUREMODE_BGIMAGE; + + u16 imageX = objBg->imageX >> 5; + u16 imageY = objBg->imageY >> 5; + + s16 frameX = objBg->frameX / 4; + s16 frameY = objBg->frameY / 4; + u16 frameW = objBg->frameW >> 2; + u16 frameH = objBg->frameH >> 2; + + gSPTexture( 1.0f, 1.0f, 0, 0, TRUE ); + + gDPTextureRectangle( frameX, frameY, frameX + frameW - 1, frameY + frameH - 1, 0, imageX, imageY, 4, 1 ); +} + +void gSPObjRectangle( u32 sp ) +{ + u32 address = RSP_SegmentToPhysical( sp ); + uObjSprite *objSprite = (uObjSprite*)&RDRAM[address]; + + f32 scaleW = _FIXED2FLOAT( objSprite->scaleW, 10 ); + f32 scaleH = _FIXED2FLOAT( objSprite->scaleH, 10 ); + f32 objX = _FIXED2FLOAT( objSprite->objX, 2 ); + f32 objY = _FIXED2FLOAT( objSprite->objY, 2 ); + u32 imageW = objSprite->imageW >> 2; + u32 imageH = objSprite->imageH >> 2; + + gDPTextureRectangle( objX, objY, objX + imageW / scaleW - 1, objY + imageH / scaleH - 1, 0, 0.0f, 0.0f, scaleW * (gDP.otherMode.cycleType == G_CYC_COPY ? 4.0f : 1.0f), scaleH ); +} + +void gSPObjLoadTxtr( u32 tx ) +{ + u32 address = RSP_SegmentToPhysical( tx ); + uObjTxtr *objTxtr = (uObjTxtr*)&RDRAM[address]; + + if ((gSP.status[objTxtr->block.sid >> 2] & objTxtr->block.mask) != objTxtr->block.flag) + { + switch (objTxtr->block.type) + { + case G_OBJLT_TXTRBLOCK: + gDPSetTextureImage( 0, 1, 0, objTxtr->block.image ); + gDPSetTile( 0, 1, 0, objTxtr->block.tmem, 7, 0, 0, 0, 0, 0, 0, 0 ); + gDPLoadBlock( 7, 0, 0, ((objTxtr->block.tsize + 1) << 3) - 1, objTxtr->block.tline ); + break; + case G_OBJLT_TXTRTILE: + gDPSetTextureImage( 0, 1, (objTxtr->tile.twidth + 1) << 1, objTxtr->tile.image ); + gDPSetTile( 0, 1, (objTxtr->tile.twidth + 1) >> 2, objTxtr->tile.tmem, 7, 0, 0, 0, 0, 0, 0, 0 ); + gDPLoadTile( 7, 0, 0, (((objTxtr->tile.twidth + 1) << 1) - 1) << 2, (((objTxtr->tile.theight + 1) >> 2) - 1) << 2 ); + break; + case G_OBJLT_TLUT: + gDPSetTextureImage( 0, 2, 1, objTxtr->tlut.image ); + gDPSetTile( 0, 2, 0, objTxtr->tlut.phead, 7, 0, 0, 0, 0, 0, 0, 0 ); + gDPLoadTLUT( 7, 0, 0, objTxtr->tlut.pnum << 2, 0 ); + break; + } + gSP.status[objTxtr->block.sid >> 2] = (gSP.status[objTxtr->block.sid >> 2] & ~objTxtr->block.mask) | (objTxtr->block.flag & objTxtr->block.mask); + } +} + +void gSPObjSprite( u32 sp ) +{ + u32 address = RSP_SegmentToPhysical( sp ); + uObjSprite *objSprite = (uObjSprite*)&RDRAM[address]; + + f32 scaleW = _FIXED2FLOAT( objSprite->scaleW, 10 ); + f32 scaleH = _FIXED2FLOAT( objSprite->scaleH, 10 ); + f32 objX = _FIXED2FLOAT( objSprite->objX, 2 ); + f32 objY = _FIXED2FLOAT( objSprite->objY, 2 ); + u32 imageW = objSprite->imageW >> 5; + u32 imageH = objSprite->imageH >> 5; + + f32 x0 = objX; + f32 y0 = objY; + f32 x1 = objX + imageW / scaleW - 1; + f32 y1 = objY + imageH / scaleH - 1; + + s32 v0=0,v1=1,v2=2,v3=3; + +#ifdef __TRIBUFFER_OPT + v0 = OGL.triangles.indexmap[v0]; + v1 = OGL.triangles.indexmap[v1]; + v2 = OGL.triangles.indexmap[v2]; + v3 = OGL.triangles.indexmap[v3]; +#endif + + OGL.triangles.vertices[v0].x = gSP.objMatrix.A * x0 + gSP.objMatrix.B * y0 + gSP.objMatrix.X; + OGL.triangles.vertices[v0].y = gSP.objMatrix.C * x0 + gSP.objMatrix.D * y0 + gSP.objMatrix.Y; + OGL.triangles.vertices[v0].z = 0.0f; + OGL.triangles.vertices[v0].w = 1.0f; + OGL.triangles.vertices[v0].s = 0.0f; + OGL.triangles.vertices[v0].t = 0.0f; + OGL.triangles.vertices[v1].x = gSP.objMatrix.A * x1 + gSP.objMatrix.B * y0 + gSP.objMatrix.X; + OGL.triangles.vertices[v1].y = gSP.objMatrix.C * x1 + gSP.objMatrix.D * y0 + gSP.objMatrix.Y; + OGL.triangles.vertices[v1].z = 0.0f; + OGL.triangles.vertices[v1].w = 1.0f; + OGL.triangles.vertices[v1].s = imageW - 1; + OGL.triangles.vertices[v1].t = 0.0f; + OGL.triangles.vertices[v2].x = gSP.objMatrix.A * x1 + gSP.objMatrix.B * y1 + gSP.objMatrix.X; + OGL.triangles.vertices[v2].y = gSP.objMatrix.C * x1 + gSP.objMatrix.D * y1 + gSP.objMatrix.Y; + OGL.triangles.vertices[v2].z = 0.0f; + OGL.triangles.vertices[v2].w = 1.0f; + OGL.triangles.vertices[v2].s = imageW - 1; + OGL.triangles.vertices[v2].t = imageH - 1; + OGL.triangles.vertices[v3].x = gSP.objMatrix.A * x0 + gSP.objMatrix.B * y1 + gSP.objMatrix.X; + OGL.triangles.vertices[v3].y = gSP.objMatrix.C * x0 + gSP.objMatrix.D * y1 + gSP.objMatrix.Y; + OGL.triangles.vertices[v3].z = 0.0f; + OGL.triangles.vertices[v3].w = 1.0f; + OGL.triangles.vertices[v3].s = 0; + OGL.triangles.vertices[v3].t = imageH - 1; + + gDPSetTile( objSprite->imageFmt, objSprite->imageSiz, objSprite->imageStride, objSprite->imageAdrs, 0, objSprite->imagePal, G_TX_CLAMP, G_TX_CLAMP, 0, 0, 0, 0 ); + gDPSetTileSize( 0, 0, 0, (imageW - 1) << 2, (imageH - 1) << 2 ); + gSPTexture( 1.0f, 1.0f, 0, 0, TRUE ); + + //glOrtho( 0, VI.width, VI.height, 0, 0.0f, 32767.0f ); + OGL.triangles.vertices[v0].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f; + OGL.triangles.vertices[v0].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f; + OGL.triangles.vertices[v0].z = -1.0f; + OGL.triangles.vertices[v0].w = 1.0f; + OGL.triangles.vertices[v1].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f; + OGL.triangles.vertices[v1].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f; + OGL.triangles.vertices[v1].z = -1.0f; + OGL.triangles.vertices[v1].w = 1.0f; + OGL.triangles.vertices[v2].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f; + OGL.triangles.vertices[v2].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f; + OGL.triangles.vertices[v2].z = -1.0f; + OGL.triangles.vertices[v2].w = 1.0f; + OGL.triangles.vertices[v3].x = 2.0f * VI.rwidth * OGL.triangles.vertices[v0].x - 1.0f; + OGL.triangles.vertices[v3].y = -2.0f * VI.rheight * OGL.triangles.vertices[v0].y + 1.0f; + OGL.triangles.vertices[v3].z = -1.0f; + OGL.triangles.vertices[v3].w = 1.0f; + + OGL_AddTriangle(v0, v1, v2); + OGL_AddTriangle(v0, v2, v3); + OGL_DrawTriangles(); + + if (depthBuffer.current) depthBuffer.current->cleared = FALSE; + gDP.colorImage.changed = TRUE; + gDP.colorImage.height = (unsigned int)(max( gDP.colorImage.height, gDP.scissor.lry )); +} + +void gSPObjLoadTxSprite( u32 txsp ) +{ + gSPObjLoadTxtr( txsp ); + gSPObjSprite( txsp + sizeof( uObjTxtr ) ); +} + +void gSPObjLoadTxRectR( u32 txsp ) +{ + gSPObjLoadTxtr( txsp ); +// gSPObjRectangleR( txsp + sizeof( uObjTxtr ) ); +} + +void gSPObjMatrix( u32 mtx ) +{ + u32 address = RSP_SegmentToPhysical( mtx ); + uObjMtx *objMtx = (uObjMtx*)&RDRAM[address]; + + gSP.objMatrix.A = _FIXED2FLOAT( objMtx->A, 16 ); + gSP.objMatrix.B = _FIXED2FLOAT( objMtx->B, 16 ); + gSP.objMatrix.C = _FIXED2FLOAT( objMtx->C, 16 ); + gSP.objMatrix.D = _FIXED2FLOAT( objMtx->D, 16 ); + gSP.objMatrix.X = _FIXED2FLOAT( objMtx->X, 2 ); + gSP.objMatrix.Y = _FIXED2FLOAT( objMtx->Y, 2 ); + gSP.objMatrix.baseScaleX = _FIXED2FLOAT( objMtx->BaseScaleX, 10 ); + gSP.objMatrix.baseScaleY = _FIXED2FLOAT( objMtx->BaseScaleY, 10 ); +} + +void gSPObjSubMatrix( u32 mtx ) +{ +} + + +#ifdef __VEC4_OPT +void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) = + gSPTransformVertex4_default; +void (*gSPTransformNormal4)(u32 v, float mtx[4][4]) = + gSPTransformNormal4_default; +void (*gSPLightVertex4)(u32 v) = gSPLightVertex4_default; +void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4_default; +#endif +void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]) = + gSPTransformVertex_default; +void (*gSPLightVertex)(u32 v) = gSPLightVertex_default; +void (*gSPBillboardVertex)(u32 v, u32 i) = gSPBillboardVertex_default; + diff --git a/source/gles2n64/src/gSP.h b/source/gles2n64/src/gSP.h new file mode 100644 index 0000000..6498224 --- /dev/null +++ b/source/gles2n64/src/gSP.h @@ -0,0 +1,264 @@ +#ifndef GSP_H +#define GSP_H + +#include "Types.h" +#include "GBI.h" +#include "gDP.h" + +#define CHANGED_VIEWPORT 0x01 +#define CHANGED_MATRIX 0x02 +#define CHANGED_COLORBUFFER 0x04 +#define CHANGED_GEOMETRYMODE 0x08 +#define CHANGED_TEXTURE 0x10 +#define CHANGED_FOGPOSITION 0x20 +#define CHANGED_TEXTURESCALE 0x40 + +//#ifdef __TRIBUFFER_OPT +// #define gSPFlushTriangles() \ +// if \ +// ( \ +// (OGL.triangles.num > 1000) || \ +// ( \ +// (RSP.nextCmd != G_NOOP) && \ +// (RSP.nextCmd != G_RDPNOOP) && \ +// (RSP.nextCmd != G_MOVEMEM) && \ +// (RSP.nextCmd != G_ENDDL) && \ +// (RSP.nextCmd != G_DL) && \ +// (RSP.nextCmd != G_VTXCOLORBASE) && \ +// (RSP.nextCmd != G_TRI1) && \ +// (RSP.nextCmd != G_TRI2) && \ +// (RSP.nextCmd != G_TRI4) && \ +// (RSP.nextCmd != G_QUAD) && \ +// (RSP.nextCmd != G_VTX) && \ +// (RSP.nextCmd != G_MTX) \ +// ) \ +// ) \ +// { \ +// OGL_DrawTriangles(); \ +// } +//#else +// #define gSPFlushTriangles() \ +// if \ +// ( \ +// (RSP.nextCmd != G_TRI1) && \ +// (RSP.nextCmd != G_TRI2) && \ +// (RSP.nextCmd != G_TRI4) && \ +// (RSP.nextCmd != G_QUAD) \ +// ) \ +// { \ +// OGL_DrawTriangles(); \ +// } +//#endif +#define gSPFlushTriangles() \ +if \ +( \ + ( \ + (config.tribufferOpt) && \ + (OGL.triangles.num > 1000) || \ + ( \ + (RSP.nextCmd != G_NOOP) && \ + (RSP.nextCmd != G_RDPNOOP) && \ + (RSP.nextCmd != G_MOVEMEM) && \ + (RSP.nextCmd != G_ENDDL) && \ + (RSP.nextCmd != G_DL) && \ + (RSP.nextCmd != G_VTXCOLORBASE) && \ + (RSP.nextCmd != G_TRI1) && \ + (RSP.nextCmd != G_TRI2) && \ + (RSP.nextCmd != G_TRI4) && \ + (RSP.nextCmd != G_QUAD) && \ + (RSP.nextCmd != G_VTX) && \ + (RSP.nextCmd != G_MTX) \ + ) \ + ) || \ + ( \ + (RSP.nextCmd != G_TRI1) && \ + (RSP.nextCmd != G_TRI2) && \ + (RSP.nextCmd != G_TRI4) && \ + (RSP.nextCmd != G_QUAD) \ + ) \ +) \ +{ \ + OGL_DrawTriangles(); \ +} + + +#define CLIP_X 0x03 +#define CLIP_NEGX 0x01 +#define CLIP_POSX 0x02 + +#define CLIP_Y 0x0C +#define CLIP_NEGY 0x04 +#define CLIP_POSY 0x08 + +#define CLIP_Z 0x30 +#define CLIP_NEGZ 0x10 +#define CLIP_POSZ 0x20 + +struct SPVertex +{ + f32 x, y, z, w; + f32 nx, ny, nz, __pad0; + f32 r, g, b, a; + f32 s, t; + + u32 clip; + s16 flag; + s16 __pad1; +}; + +typedef SPVertex SPTriangle[3]; + +struct SPLight +{ + f32 r, g, b; + f32 x, y, z; +}; + +struct gSPInfo +{ + u32 segment[16]; + + struct + { + u32 modelViewi, stackSize, billboard; + f32 modelView[32][4][4]; + f32 projection[4][4]; + f32 combined[4][4]; + } matrix; + + struct + { + f32 A, B, C, D; + f32 X, Y; + f32 baseScaleX, baseScaleY; + } objMatrix; + + u32 vertexColorBase; + u32 vertexi; + + SPLight lights[8]; + + struct + { + f32 scales, scalet; + s32 level, on, tile; + } texture; + + gDPTile *textureTile[2]; + + struct + { + f32 vscale[4]; + f32 vtrans[4]; + f32 x, y, width, height; + f32 nearz, farz; + } viewport; + + struct + { + s16 multiplier, offset; + } fog; + + struct + { + u32 address, width, height, format, size, palette; + } bgImage; + + u32 geometryMode; + s32 numLights; + + u32 changed; + + u32 status[4]; + + struct + { + u32 vtx, mtx; + } DMAOffsets; +}; + +extern gSPInfo gSP; + +void gSPLoadUcodeEx( u32 uc_start, u32 uc_dstart, u16 uc_dsize ); +void gSPNoOp(); +void gSPMatrix( u32 matrix, u8 param ); +void gSPDMAMatrix( u32 matrix, u8 index, u8 multiply ); +void gSPViewport( u32 v ); +void gSPForceMatrix( u32 mptr ); +void gSPLight( u32 l, s32 n ); +void gSPLookAt( u32 l ); +void gSPVertex( u32 v, u32 n, u32 v0 ); +void gSPCIVertex( u32 v, u32 n, u32 v0 ); +void gSPDMAVertex( u32 v, u32 n, u32 v0 ); +void gSPDisplayList( u32 dl ); +void gSPDMADisplayList( u32 dl, u32 n ); +void gSPBranchList( u32 dl ); +void gSPBranchLessZ( u32 branchdl, u32 vtx, f32 zval ); +void gSPSprite2DBase( u32 base ); +void gSPDMATriangles( u32 tris, u32 n ); +void gSP1Quadrangle( s32 v0, s32 v1, s32 v2, s32 v3 ); +void gSPCullDisplayList( u32 v0, u32 vn ); +void gSPPopMatrix( u32 param ); +void gSPPopMatrixN( u32 param, u32 num ); +void gSPSegment( s32 seg, s32 base ); +void gSPClipRatio( u32 r ); +void gSPInsertMatrix( u32 where, u32 num ); +void gSPModifyVertex( u32 vtx, u32 where, u32 val ); +void gSPNumLights( s32 n ); +void gSPLightColor( u32 lightNum, u32 packedColor ); +void gSPFogFactor( s16 fm, s16 fo ); +void gSPPerspNormalize( u16 scale ); +void gSPTexture( f32 sc, f32 tc, s32 level, s32 tile, s32 on ); +void gSPEndDisplayList(); +void gSPGeometryMode( u32 clear, u32 set ); +void gSPSetGeometryMode( u32 mode ); +void gSPClearGeometryMode( u32 mode ); +void gSPLine3D( s32 v0, s32 v1, s32 flag ); +void gSPLineW3D( s32 v0, s32 v1, s32 wd, s32 flag ); +void gSPObjRectangle( u32 sp ); +void gSPObjSprite( u32 sp ); +void gSPObjLoadTxtr( u32 tx ); +void gSPObjLoadTxSprite( u32 txsp ); +void gSPObjLoadTxRectR( u32 txsp ); +void gSPBgRect1Cyc( u32 bg ); +void gSPBgRectCopy( u32 bg ); +void gSPObjMatrix( u32 mtx ); +void gSPObjSubMatrix( u32 mtx ); +void gSPSetDMAOffsets( u32 mtxoffset, u32 vtxoffset ); +void gSPSetVertexColorBase( u32 base ); +void gSPProcessVertex(u32 v); + +void gSPTriangleUnknown(); + +void gSP1Triangle(s32 v0, s32 v1, s32 v2); +void gSP2Triangles(const s32 v00, const s32 v01, const s32 v02, const s32 flag0, + const s32 v10, const s32 v11, const s32 v12, const s32 flag1 ); +void gSP4Triangles(const s32 v00, const s32 v01, const s32 v02, + const s32 v10, const s32 v11, const s32 v12, + const s32 v20, const s32 v21, const s32 v22, + const s32 v30, const s32 v31, const s32 v32 ); + + +//#ifdef __TRIBUFFER_OPT +void __indexmap_init(); +void __indexmap_clear(); +u32 __indexmap_findunused(u32 num); +u32 __indexmap_getnew(u32 index, u32 num); +//#endif + +#ifdef __VEC4_OPT +extern void (*gSPTransformVertex4)(u32 v, float mtx[4][4]); +extern void (*gSPTransformNormal4)(u32 v, float mtx[4][4]); +extern void (*gSPLightVertex4)(u32 v); +extern void (*gSPBillboardVertex4)(u32 v); +#endif +extern void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]); +extern void (*gSPLightVertex)(u32 v); +extern void (*gSPBillboardVertex)(u32 v, u32 i); + +#ifdef __NEON_OPT +void gSPInitNeon(); +#endif + +#endif + diff --git a/source/gles2n64/src/gSPNeon.cpp b/source/gles2n64/src/gSPNeon.cpp new file mode 100644 index 0000000..109aba4 --- /dev/null +++ b/source/gles2n64/src/gSPNeon.cpp @@ -0,0 +1,563 @@ +#include "gSP.h" +#include "OpenGL.h" + +#ifdef __VEC4_OPT +static void gSPTransformVertex4NEON(u32 v, float mtx[4][4]) +{ + float *ptr = &OGL.triangles.vertices[v].x; + +#if 0 + volatile int tmp0, tmp1; + asm volatile ( + "vld1.32 {d0, d1}, [%1, :128] \n\t" //q0 = {x,y,z,w} + "add %1, %1, %4 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d18, d19}, [%0, :128]! \n\t" //q9 = m + "vld1.32 {d2, d3}, [%1, :128] \n\t" //q1 = {x,y,z,w} + "add %1, %1, %4 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d20, d21}, [%0, :128]! \n\t" //q10 = m + "vld1.32 {d4, d5}, [%1, :128] \n\t" //q2 = {x,y,z,w} + "add %1, %1, %4 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d22, d23}, [%0, :128]! \n\t" //q11 = m + "vld1.32 {d6, d7}, [%1, :128] \n\t" //q3 = {x,y,z,w} + "vld1.32 {d24, d25}, [%0, :128] \n\t" //q12 = m + "sub %1, %1, %6 \n\t" //q0 = {x,y,z,w} + + "vmov.f32 q13, q12 \n\t" //q13 = q12 + "vmov.f32 q14, q12 \n\t" //q14 = q12 + "vmov.f32 q15, q12 \n\t" //q15 = q12 + + "vmla.f32 q12, q9, d0[0] \n\t" //q12 = q9*d0[0] + "vmla.f32 q13, q9, d2[0] \n\t" //q13 = q9*d0[0] + "vmla.f32 q14, q9, d4[0] \n\t" //q14 = q9*d0[0] + "vmla.f32 q15, q9, d6[0] \n\t" //q15 = q9*d0[0] + "vmla.f32 q12, q10, d0[1] \n\t" //q12 = q10*d0[1] + "vmla.f32 q13, q10, d2[1] \n\t" //q13 = q10*d0[1] + "vmla.f32 q14, q10, d4[1] \n\t" //q14 = q10*d0[1] + "vmla.f32 q15, q10, d6[1] \n\t" //q15 = q10*d0[1] + "vmla.f32 q12, q11, d1[0] \n\t" //q12 = q11*d1[0] + "vmla.f32 q13, q11, d3[0] \n\t" //q13 = q11*d1[0] + "vmla.f32 q14, q11, d5[0] \n\t" //q14 = q11*d1[0] + "vmla.f32 q15, q11, d7[0] \n\t" //q15 = q11*d1[0] + + "add %0, %1, %4 \n\t" //q0 = {x,y,z,w} + "add %2, %1, %5 \n\t" //q0 = {x,y,z,w} + "add %3, %1, %6 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d24, d25}, [%1, :128] \n\t" //q12 + "vst1.32 {d26, d27}, [%0, :128] \n\t" //q13 + "vst1.32 {d28, d29}, [%2, :128] \n\t" //q14 + "vst1.32 {d30, d31}, [%3, :128] \n\t" //q15 + : "+&r"(mtx), "+&r"(ptr), "+r"(tmp0), "+r"(tmp1) + : "I"(sizeof(SPVertex)),"I"(2 * sizeof(SPVertex)), "I"(3 * sizeof(SPVertex)) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d18","d19", "d20", "d21", "d22", "d23", "d24", + "d25", "d26", "d27", "d28", "d29", "d30", "d31", "memory" + ); +#else + asm volatile ( + "vld1.32 {d0, d1}, [%1] \n\t" //q0 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d2, d3}, [%1] \n\t" //q1 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d4, d5}, [%1] \n\t" //q2 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d6, d7}, [%1] \n\t" //q3 = {x,y,z,w} + "sub %1, %1, %3 \n\t" //q0 = {x,y,z,w} + + "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m + "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m + "vld1.32 {d22, d23}, [%0]! \n\t" //q11 = m + "vld1.32 {d24, d25}, [%0] \n\t" //q12 = m + + "vmov.f32 q13, q12 \n\t" //q13 = q12 + "vmov.f32 q14, q12 \n\t" //q14 = q12 + "vmov.f32 q15, q12 \n\t" //q15 = q12 + + "vmla.f32 q12, q9, d0[0] \n\t" //q12 = q9*d0[0] + "vmla.f32 q13, q9, d2[0] \n\t" //q13 = q9*d0[0] + "vmla.f32 q14, q9, d4[0] \n\t" //q14 = q9*d0[0] + "vmla.f32 q15, q9, d6[0] \n\t" //q15 = q9*d0[0] + "vmla.f32 q12, q10, d0[1] \n\t" //q12 = q10*d0[1] + "vmla.f32 q13, q10, d2[1] \n\t" //q13 = q10*d0[1] + "vmla.f32 q14, q10, d4[1] \n\t" //q14 = q10*d0[1] + "vmla.f32 q15, q10, d6[1] \n\t" //q15 = q10*d0[1] + "vmla.f32 q12, q11, d1[0] \n\t" //q12 = q11*d1[0] + "vmla.f32 q13, q11, d3[0] \n\t" //q13 = q11*d1[0] + "vmla.f32 q14, q11, d5[0] \n\t" //q14 = q11*d1[0] + "vmla.f32 q15, q11, d7[0] \n\t" //q15 = q11*d1[0] + + "vst1.32 {d24, d25}, [%1] \n\t" //q12 + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d26, d27}, [%1] \n\t" //q13 + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d28, d29}, [%1] \n\t" //q14 + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d30, d31}, [%1] \n\t" //q15 + + : "+&r"(mtx), "+&r"(ptr) + : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex)) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d18","d19", "d20", "d21", "d22", "d23", "d24", + "d25", "d26", "d27", "d28", "d29", "d30", "d31", "memory" + ); +#endif +} + +//4x Transform normal and normalize +static void gSPTransformNormal4NEON(u32 v, float mtx[4][4]) +{ + void *ptr = (void*)&OGL.triangles.vertices[v].nx; + asm volatile ( + "vld1.32 {d0, d1}, [%1] \n\t" //q0 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d2, d3}, [%1] \n\t" //q1 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d4, d5}, [%1] \n\t" //q2 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d6, d7}, [%1] \n\t" //q3 = {x,y,z,w} + "sub %1, %1, %3 \n\t" //q0 = {x,y,z,w} + + "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m + "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m+16 + "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m+32 + + "vmul.f32 q12, q9, d0[0] \n\t" //q12 = q9*d0[0] + "vmul.f32 q13, q9, d2[0] \n\t" //q13 = q9*d2[0] + "vmul.f32 q14, q9, d4[0] \n\t" //q14 = q9*d4[0] + "vmul.f32 q15, q9, d6[0] \n\t" //q15 = q9*d6[0] + + "vmla.f32 q12, q10, d0[1] \n\t" //q12 += q10*q0[1] + "vmla.f32 q13, q10, d2[1] \n\t" //q13 += q10*q2[1] + "vmla.f32 q14, q10, d4[1] \n\t" //q14 += q10*q4[1] + "vmla.f32 q15, q10, d6[1] \n\t" //q15 += q10*q6[1] + + "vmla.f32 q12, q11, d1[0] \n\t" //q12 += q11*d1[0] + "vmla.f32 q13, q11, d3[0] \n\t" //q13 += q11*d3[0] + "vmla.f32 q14, q11, d5[0] \n\t" //q14 += q11*d5[0] + "vmla.f32 q15, q11, d7[0] \n\t" //q15 += q11*d7[0] + + "vmul.f32 q0, q12, q12 \n\t" //q0 = q12*q12 + "vmul.f32 q1, q13, q13 \n\t" //q1 = q13*q13 + "vmul.f32 q2, q14, q14 \n\t" //q2 = q14*q14 + "vmul.f32 q3, q15, q15 \n\t" //q3 = q15*q15 + + "vpadd.f32 d0, d0 \n\t" //d0[0] = d0[0] + d0[1] + "vpadd.f32 d2, d2 \n\t" //d2[0] = d2[0] + d2[1] + "vpadd.f32 d4, d4 \n\t" //d4[0] = d4[0] + d4[1] + "vpadd.f32 d6, d6 \n\t" //d6[0] = d6[0] + d6[1] + + "vmov.f32 s1, s2 \n\t" //d0[1] = d1[0] + "vmov.f32 s5, s6 \n\t" //d2[1] = d3[0] + "vmov.f32 s9, s10 \n\t" //d4[1] = d5[0] + "vmov.f32 s13, s14 \n\t" //d6[1] = d7[0] + + "vpadd.f32 d0, d0, d2 \n\t" //d0 = {d0[0] + d0[1], d2[0] + d2[1]} + "vpadd.f32 d1, d4, d6 \n\t" //d1 = {d4[0] + d4[1], d6[0] + d6[1]} + + "vmov.f32 q1, q0 \n\t" //q1 = q0 + "vrsqrte.f32 q0, q0 \n\t" //q0 = ~ 1.0 / sqrt(q0) + "vmul.f32 q2, q0, q1 \n\t" //q2 = q0 * q1 + "vrsqrts.f32 q3, q2, q0 \n\t" //q3 = (3 - q0 * q2) / 2 + "vmul.f32 q0, q0, q3 \n\t" //q0 = q0 * q3 + "vmul.f32 q2, q0, q1 \n\t" //q2 = q0 * q1 + "vrsqrts.f32 q3, q2, q0 \n\t" //q3 = (3 - q0 * q2) / 2 + "vmul.f32 q0, q0, q3 \n\t" //q0 = q0 * q3 + + "vmul.f32 q3, q15, d1[1] \n\t" //q3 = q15*d1[1] + "vmul.f32 q2, q14, d1[0] \n\t" //q2 = q14*d1[0] + "vmul.f32 q1, q13, d0[1] \n\t" //q1 = q13*d0[1] + "vmul.f32 q0, q12, d0[0] \n\t" //q0 = q12*d0[0] + + "vst1.32 {d0, d1}, [%1] \n\t" //d0={nx,ny,nz,pad} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d2, d3}, [%1] \n\t" //d2={nx,ny,nz,pad} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d4, d5}, [%1] \n\t" //d4={nx,ny,nz,pad} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d6, d7}, [%1] \n\t" //d6={nx,ny,nz,pad} + + : "+&r"(mtx), "+&r"(ptr) + : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex)) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16","d17", "d18","d19", "d20", "d21", "d22", + "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31", "memory" + ); +} + +static void gSPLightVertex4NEON(u32 v) +{ + volatile float result[16]; + + volatile int i = gSP.numLights; + volatile int tmp = 0; + volatile void *ptr0 = &(gSP.lights[0].r); + volatile void *ptr1 = &(OGL.triangles.vertices[v].nx); + volatile void *ptr2 = result; + volatile void *ptr3 = gSP.matrix.modelView[gSP.matrix.modelViewi]; + asm volatile ( + "vld1.32 {d0, d1}, [%1] \n\t" //q0 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d2, d3}, [%1] \n\t" //q1 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d4, d5}, [%1] \n\t" //q2 = {x,y,z,w} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d6, d7}, [%1] \n\t" //q3 = {x,y,z,w} + "sub %1, %1, %3 \n\t" //q0 = {x,y,z,w} + + "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m + "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m+16 + "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m+32 + + "vmul.f32 q12, q9, d0[0] \n\t" //q12 = q9*d0[0] + "vmul.f32 q13, q9, d2[0] \n\t" //q13 = q9*d2[0] + "vmul.f32 q14, q9, d4[0] \n\t" //q14 = q9*d4[0] + "vmul.f32 q15, q9, d6[0] \n\t" //q15 = q9*d6[0] + + "vmla.f32 q12, q10, d0[1] \n\t" //q12 += q10*q0[1] + "vmla.f32 q13, q10, d2[1] \n\t" //q13 += q10*q2[1] + "vmla.f32 q14, q10, d4[1] \n\t" //q14 += q10*q4[1] + "vmla.f32 q15, q10, d6[1] \n\t" //q15 += q10*q6[1] + + "vmla.f32 q12, q11, d1[0] \n\t" //q12 += q11*d1[0] + "vmla.f32 q13, q11, d3[0] \n\t" //q13 += q11*d3[0] + "vmla.f32 q14, q11, d5[0] \n\t" //q14 += q11*d5[0] + "vmla.f32 q15, q11, d7[0] \n\t" //q15 += q11*d7[0] + + "vmul.f32 q0, q12, q12 \n\t" //q0 = q12*q12 + "vmul.f32 q1, q13, q13 \n\t" //q1 = q13*q13 + "vmul.f32 q2, q14, q14 \n\t" //q2 = q14*q14 + "vmul.f32 q3, q15, q15 \n\t" //q3 = q15*q15 + + "vpadd.f32 d0, d0 \n\t" //d0[0] = d0[0] + d0[1] + "vpadd.f32 d2, d2 \n\t" //d2[0] = d2[0] + d2[1] + "vpadd.f32 d4, d4 \n\t" //d4[0] = d4[0] + d4[1] + "vpadd.f32 d6, d6 \n\t" //d6[0] = d6[0] + d6[1] + + "vmov.f32 s1, s2 \n\t" //d0[1] = d1[0] + "vmov.f32 s5, s6 \n\t" //d2[1] = d3[0] + "vmov.f32 s9, s10 \n\t" //d4[1] = d5[0] + "vmov.f32 s13, s14 \n\t" //d6[1] = d7[0] + + "vpadd.f32 d0, d0, d2 \n\t" //d0 = {d0[0] + d0[1], d2[0] + d2[1]} + "vpadd.f32 d1, d4, d6 \n\t" //d1 = {d4[0] + d4[1], d6[0] + d6[1]} + + "vmov.f32 q1, q0 \n\t" //q1 = q0 + "vrsqrte.f32 q0, q0 \n\t" //q0 = ~ 1.0 / sqrt(q0) + "vmul.f32 q2, q0, q1 \n\t" //q2 = q0 * q1 + "vrsqrts.f32 q3, q2, q0 \n\t" //q3 = (3 - q0 * q2) / 2 + "vmul.f32 q0, q0, q3 \n\t" //q0 = q0 * q3 + "vmul.f32 q2, q0, q1 \n\t" //q2 = q0 * q1 + "vrsqrts.f32 q3, q2, q0 \n\t" //q3 = (3 - q0 * q2) / 2 + "vmul.f32 q0, q0, q3 \n\t" //q0 = q0 * q3 + + "vmul.f32 q3, q15, d1[1] \n\t" //q3 = q15*d1[1] + "vmul.f32 q2, q14, d1[0] \n\t" //q2 = q14*d1[0] + "vmul.f32 q1, q13, d0[1] \n\t" //q1 = q13*d0[1] + "vmul.f32 q0, q12, d0[0] \n\t" //q0 = q12*d0[0] + + "vst1.32 {d0, d1}, [%1] \n\t" //d0={nx,ny,nz,pad} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d2, d3}, [%1] \n\t" //d2={nx,ny,nz,pad} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d4, d5}, [%1] \n\t" //d4={nx,ny,nz,pad} + "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d6, d7}, [%1] \n\t" //d6={nx,ny,nz,pad} + + : "+&r"(ptr3), "+&r"(ptr1) + : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex)) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16","d17", "d18","d19", "d20", "d21", "d22", + "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31", "memory" + ); + asm volatile ( + + "mov %0, %5 \n\t" //r0=sizeof(light) + "mla %0, %1, %0, %2 \n\t" //r0=r1*r0+r2 + + "vmov.f32 q8, q0 \n\t" //q8=q0 + "vmov.f32 q9, q1 \n\t" //q9=q1 + "vmov.f32 q10, q2 \n\t" //q10=q2 + "vmov.f32 q11, q3 \n\t" //q11=q3 + + "vld1.32 {d0}, [%0] \n\t" //d0={r,g} + "flds s2, [%0, #8] \n\t" //d1[0]={b} + "vmov.f32 q1, q0 \n\t" //q1=q0 + "vmov.f32 q2, q0 \n\t" //q2=q0 + "vmov.f32 q3, q0 \n\t" //q3=q0 + + "vmov.f32 q15, #0.0 \n\t" //q15=0 + "vdup.f32 q15, d30[0] \n\t" //q15=d30[0] + + "cmp %1, #0 \n\t" // + "beq 2f \n\t" //(r1==0) goto 2 + + "1: \n\t" // + "vld1.32 {d8}, [%2]! \n\t" //d8={r,g} + "flds s18, [%2] \n\t" //q9[0]={b} + "add %2, %2, #4 \n\t" //q9[0]={b} + "vld1.32 {d10}, [%2]! \n\t" //d10={x,y} + "flds s22, [%2] \n\t" //d11[0]={z} + "add %2, %2, #4 \n\t" //q9[0]={b} + + "vmov.f32 q13, q5 \n\t" //q13 = q5 + "vmov.f32 q12, q4 \n\t" //q12 = q4 + + "vmul.f32 q4, q8, q13 \n\t" //q4 = q8*q13 + "vmul.f32 q5, q9, q13 \n\t" //q5 = q9*q13 + "vmul.f32 q6, q10, q13 \n\t" //q6 = q10*q13 + "vmul.f32 q7, q11, q13 \n\t" //q7 = q11*q13 + + "vpadd.f32 d8, d8 \n\t" //d8[0] = d8[0] + d8[1] + "vpadd.f32 d10, d10 \n\t" //d10[0] = d10[0] + d10[1] + "vpadd.f32 d12, d12 \n\t" //d12[0] = d12[0] + d12[1] + "vpadd.f32 d14, d14 \n\t" //d14[0] = d14[0] + d14[1] + + "vmov.f32 s17, s18 \n\t" //d8[1] = d9[0] + "vmov.f32 s21, s22 \n\t" //d10[1] = d11[0] + "vmov.f32 s25, s26 \n\t" //d12[1] = d13[0] + "vmov.f32 s29, s30 \n\t" //d14[1] = d15[0] + + "vpadd.f32 d8, d8, d10 \n\t" //d8 = {d8[0] + d8[1], d10[0] + d10[1]} + "vpadd.f32 d9, d12, d14 \n\t" //d9 = {d12[0] + d12[1], d14[0] + d14[1]} + + "vmax.f32 q4, q4, q15 \n\t" //q4=max(q4, 0) + + "vmla.f32 q0, q12, d8[0] \n\t" //q0 += + "vmla.f32 q1, q12, d8[1] \n\t" //d1 = {d4[0] + d4[1], d6[0] + d6[1]} + "vmla.f32 q2, q12, d9[0] \n\t" //d1 = {d4[0] + d4[1], d6[0] + d6[1]} + "vmla.f32 q3, q12, d9[1] \n\t" //d1 = {d4[0] + d4[1], d6[0] + d6[1]} + + "subs %1, %1, #1 \n\t" //r1=r1 - 1 + "bne 1b \n\t" //(r1!=0) goto 1 + + "2: \n\t" // + + "vmov.f32 q4, #1.0 \n\t" // + "vmin.f32 q0, q0, q4 \n\t" // + "vmin.f32 q1, q1, q4 \n\t" // + "vmin.f32 q2, q2, q4 \n\t" // + "vmin.f32 q3, q3, q4 \n\t" // + "vst1.32 {d0, d1}, [%4]! \n\t" // + "vst1.32 {d2, d3}, [%4]! \n\t" // + "vst1.32 {d4, d5}, [%4]! \n\t" // + "vst1.32 {d6, d7}, [%4] \n\t" // + + : "+&r"(tmp), "+&r"(i), "+&r"(ptr0), "+&r"(ptr1), "+&r"(ptr2) + : "I"(sizeof(SPLight)) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16","d17", "d18","d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "memory", "cc" + ); + OGL.triangles.vertices[v].r = result[0]; + OGL.triangles.vertices[v].g = result[1]; + OGL.triangles.vertices[v].b = result[2]; + OGL.triangles.vertices[v+1].r = result[4]; + OGL.triangles.vertices[v+1].g = result[5]; + OGL.triangles.vertices[v+1].b = result[6]; + OGL.triangles.vertices[v+2].r = result[8]; + OGL.triangles.vertices[v+2].g = result[9]; + OGL.triangles.vertices[v+2].b = result[10]; + OGL.triangles.vertices[v+3].r = result[12]; + OGL.triangles.vertices[v+3].g = result[13]; + OGL.triangles.vertices[v+3].b = result[14]; +} + +static void gSPBillboardVertex4NEON(u32 v) +{ + int i = 0; + +#ifdef __TRIBUFFER_OPT + i = OGL.triangles.indexmap[0]; +#endif + + void *ptr0 = (void*)&OGL.triangles.vertices[v].x; + void *ptr1 = (void*)&OGL.triangles.vertices[i].x; + asm volatile ( + + "vld1.32 {d0, d1}, [%0] \n\t" //q0 = {x,y,z,w} + "add %0, %0, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d2, d3}, [%0] \n\t" //q1 = {x,y,z,w} + "add %0, %0, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d4, d5}, [%0] \n\t" //q2 = {x,y,z,w} + "add %0, %0, %2 \n\t" //q0 = {x,y,z,w} + "vld1.32 {d6, d7}, [%0] \n\t" //q3 = {x,y,z,w} + "sub %0, %0, %3 \n\t" //q0 = {x,y,z,w} + + "vld1.32 {d16, d17}, [%1] \n\t" //q2={x1,y1,z1,w1} + "vadd.f32 q0, q0, q8 \n\t" //q1=q1+q1 + "vadd.f32 q1, q1, q8 \n\t" //q1=q1+q1 + "vadd.f32 q2, q2, q8 \n\t" //q1=q1+q1 + "vadd.f32 q3, q3, q8 \n\t" //q1=q1+q1 + "vst1.32 {d0, d1}, [%0] \n\t" // + "add %0, %0, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d2, d3}, [%0] \n\t" // + "add %0, %0, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d4, d5}, [%0] \n\t" // + "add %0, %0, %2 \n\t" //q0 = {x,y,z,w} + "vst1.32 {d6, d7}, [%0] \n\t" // + : "+&r"(ptr0), "+&r"(ptr1) + : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex)) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "memory" + ); +} +#endif + +static void gSPTransformVertexNEON(float vtx[4], float mtx[4][4]) +{ +//optimised using cycle analyser +#if 0 + volatile int tmp0, tmp1; + asm volatile ( + "vld1.32 {d0, d1}, [%3, :128] \n\t" //q0 = *v + "add %1, %0, #16 \n\t" //r1=r0+16 + "vld1.32 {d18, d19}, [%0, :128] \n\t" //q9 = m + "add %2, %0, #32 \n\t" //r2=r0+32 + "vld1.32 {d20, d21}, [%1, :128] \n\t" //q10 = m+4 + "add %0, %0, #48 \n\t" //r0=r0+48 + "vld1.32 {d22, d23}, [%2, :128] \n\t" //q11 = m+8 + "vld1.32 {d24, d25}, [%0, :128] \n\t" //q12 = m+12 + + "vmla.f32 q12, q9, d0[0] \n\t" //q12 = q12 + q9*Q0[0] + "vmul.f32 q13, q10, d0[1] \n\t" //q13 = Q10*Q0[1] + "vmul.f32 q14, q11, d1[0] \n\t" //q14 = Q11*Q0[2] + "vadd.f32 q12, q12, q13 \n\t" //q12 = q12 + q14 + "vadd.f32 q12, q12, q14 \n\t" //Q12 = q12 + q15 + + "vst1.32 {d24, d25}, [%3, :128] \n\t" //*v = q12 + + : "+r"(mtx), "+r"(tmp0), "+r"(tmp1) : "r"(vtx) + : "d0", "d1", "d18","d19","d20","d21","d22","d23","d24","d25", + "d26", "d27", "memory" + ); + +#else + asm volatile ( + "vld1.32 {d0, d1}, [%1] \n\t" //d8 = {x,y} + "vld1.32 {d18, d19}, [%0]! \n\t" //Q1 = m + "vld1.32 {d20, d21}, [%0]! \n\t" //Q2 = m+4 + "vld1.32 {d22, d23}, [%0]! \n\t" //Q3 = m+8 + "vld1.32 {d24, d25}, [%0] \n\t" //Q4 = m+12 + + "vmul.f32 q13, q9, d0[0] \n\t" //Q5 = Q1*Q0[0] + "vmla.f32 q13, q10, d0[1] \n\t" //Q5 += Q1*Q0[1] + "vmla.f32 q13, q11, d1[0] \n\t" //Q5 += Q2*Q0[2] + "vadd.f32 q13, q13, q12 \n\t" //Q5 += Q3*Q0[3] + "vst1.32 {d26, d27}, [%1] \n\t" //Q4 = m+12 + + : "+r"(mtx) : "r"(vtx) + : "d0", "d1", "d18","d19","d20","d21","d22","d23","d24","d25", + "d26", "d27", "memory" + ); +#endif +} + +static void gSPLightVertexNEON(u32 v) +{ + volatile float result[4]; + + volatile int tmp = 0; + volatile int i = gSP.numLights; + volatile void *ptr0 = &gSP.lights[0].r; + volatile void *ptr1 = &OGL.triangles.vertices[v].nx; + volatile void *ptr2 = result;; + volatile void *ptr3 = gSP.matrix.modelView[gSP.matrix.modelViewi]; + + asm volatile ( + "vld1.32 {d0, d1}, [%1] \n\t" //Q0 = v + "vld1.32 {d18, d19}, [%0]! \n\t" //Q1 = m + "vld1.32 {d20, d21}, [%0]! \n\t" //Q2 = m+4 + "vld1.32 {d22, d23}, [%0] \n\t" //Q3 = m+8 + + "vmul.f32 q2, q9, d0[0] \n\t" //q2 = q9*Q0[0] + "vmla.f32 q2, q10, d0[1] \n\t" //Q5 += Q1*Q0[1] + "vmla.f32 q2, q11, d1[0] \n\t" //Q5 += Q2*Q0[2] + + "vmul.f32 d0, d4, d4 \n\t" //d0 = d0*d0 + "vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1] + "vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d5*d5 + + "vmov.f32 d1, d0 \n\t" //d1 = d0 + "vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0) + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3 + "vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1 + "vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2 + "vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4 + + "vmul.f32 q1, q2, d0[0] \n\t" //q1 = d2*d4 + + "vst1.32 {d2, d3}, [%1] \n\t" //d0={nx,ny,nz,pad} + + : "+&r"(ptr3): "r"(ptr1) + : "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory" + ); + + asm volatile ( + "mov %0, #24 \n\t" //r0=24 + "mla %0, %1, %0, %2 \n\t" //r0=r1*r0+r2 + + "vld1.32 {d0}, [%0]! \n\t" //d0={r,g} + "flds s2, [%0] \n\t" //d1[0]={b} + "cmp %0, #0 \n\t" // + "beq 2f \n\t" //(r1==0) goto 2 + + "1: \n\t" // + "vld1.32 {d4}, [%2]! \n\t" //d4={r,g} + "flds s10, [%2] \n\t" //q5[0]={b} + "add %2, %2, #4 \n\t" //r2+=4 + "vld1.32 {d6}, [%2]! \n\t" //d6={x,y} + "flds s14, [%2] \n\t" //d7[0]={z} + "add %2, %2, #4 \n\t" //r2+=4 + "vmul.f32 d6, d2, d6 \n\t" //d6=d2*d6 + "vpadd.f32 d6, d6 \n\t" //d6=d6[0]+d6[1] + "vmla.f32 d6, d3, d7 \n\t" //d6=d6+d3*d7 + "vmov.f32 d7, #0.0 \n\t" //d7=0 + "vmax.f32 d6, d6, d7 \n\t" //d6=max(d6, d7) + "vmla.f32 q0, q2, d6[0] \n\t" //q0=q0+q2*d6[0] + "sub %1, %1, #1 \n\t" //r0=r0-1 + "cmp %1, #0 \n\t" //r0=r0-1 + "bgt 1b \n\t" //(r1!=0) ? goto 1 + "b 2f \n\t" //(r1!=0) ? goto 1 + "2: \n\t" // + "vmov.f32 q1, #1.0 \n\t" // + "vmin.f32 q0, q0, q1 \n\t" // + "vst1.32 {d0, d1}, [%3] \n\t" // + + : "+&r"(tmp), "+&r"(i), "+&r"(ptr0), "+&r"(ptr2) + :: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "memory", "cc" + ); + OGL.triangles.vertices[v].r = result[0]; + OGL.triangles.vertices[v].g = result[1]; + OGL.triangles.vertices[v].b = result[2]; +} + +static void gSPBillboardVertexNEON(u32 v, u32 i) +{ + asm volatile ( + "vld1.32 {d2, d3}, [%0] \n\t" //q1={x0,y0, z0, w0} + "vld1.32 {d4, d5}, [%1] \n\t" //q2={x1,y1, z1, w1} + "vadd.f32 q1, q1, q2 \n\t" //q1=q1+q1 + "vst1.32 {d2, d3}, [%0] \n\t" // + :: "r"(&OGL.triangles.vertices[v].x), "r"(&OGL.triangles.vertices[i].x) + : "d2", "d3", "d4", "d5", "memory" + ); +} + +void gSPInitNeon() +{ +#ifdef __VEC4_OPT + gSPTransformVertex4 = gSPTransformVertex4NEON; + gSPTransformNormal4 = gSPTransformNormal4NEON; + gSPLightVertex4 = gSPLightVertex4NEON; + gSPBillboardVertex4 = gSPBillboardVertex4NEON; +#endif + gSPTransformVertex = gSPTransformVertexNEON; + gSPLightVertex = gSPLightVertexNEON; + gSPBillboardVertex = gSPBillboardVertexNEON; +} diff --git a/source/gles2n64/src/gles2N64.cpp b/source/gles2n64/src/gles2N64.cpp new file mode 100755 index 0000000..69ec8ed --- /dev/null +++ b/source/gles2n64/src/gles2N64.cpp @@ -0,0 +1,343 @@ + +#include +#include +//#include + +#include "m64p_types.h" +#include "m64p_plugin.h" + +#include "gles2N64.h" +#include "Debug.h" +#include "OpenGL.h" +#include "N64.h" +#include "RSP.h" +#include "RDP.h" +#include "VI.h" +#include "Config.h" +#include "Textures.h" +#include "ShaderCombiner.h" +#include "3DMath.h" +#include "FrameSkipper.h" +#include "ticks.h" + +//#include "ae_bridge.h" + +ptr_ConfigGetSharedDataFilepath ConfigGetSharedDataFilepath = NULL; + +static FrameSkipper frameSkipper; + +u32 last_good_ucode = (u32) -1; +void (*CheckInterrupts)( void ); +void (*renderCallback)() = NULL; + +extern "C" { + +EXPORT m64p_error CALL PluginStartup(m64p_dynlib_handle CoreLibHandle, + void *Context, void (*DebugCallback)(void *, int, const char *)) +{ +printf("GLES2N64 Plugin StartUp\n"); + ConfigGetSharedDataFilepath = (ptr_ConfigGetSharedDataFilepath) + dlsym(CoreLibHandle, "ConfigGetSharedDataFilepath"); + +#ifdef __NEON_OPT +/* if (android_getCpuFamily() == ANDROID_CPU_FAMILY_ARM && + (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0)*/ + { + MathInitNeon(); + gSPInitNeon(); + } +#endif + return M64ERR_SUCCESS; +} + +EXPORT m64p_error CALL PluginShutdown(void) +{ +} + +EXPORT m64p_error CALL PluginGetVersion(m64p_plugin_type *PluginType, + int *PluginVersion, int *APIVersion, const char **PluginNamePtr, + int *Capabilities) +{ + /* set version info */ + if (PluginType != NULL) + *PluginType = M64PLUGIN_GFX; + + if (PluginVersion != NULL) + *PluginVersion = PLUGIN_VERSION; + + if (APIVersion != NULL) + *APIVersion = PLUGIN_API_VERSION; + + if (PluginNamePtr != NULL) + *PluginNamePtr = PLUGIN_NAME; + + if (Capabilities != NULL) + { + *Capabilities = 0; + } + + return M64ERR_SUCCESS; +} + +EXPORT void CALL ChangeWindow (void) +{ +} + +EXPORT void CALL MoveScreen (int xpos, int ypos) +{ +} + +EXPORT int CALL InitiateGFX (GFX_INFO Gfx_Info) +{ +printf("InitateGFX\n"); + DMEM = Gfx_Info.DMEM; + IMEM = Gfx_Info.IMEM; + RDRAM = Gfx_Info.RDRAM; + + REG.MI_INTR = (u32*) Gfx_Info.MI_INTR_REG; + REG.DPC_START = (u32*) Gfx_Info.DPC_START_REG; + REG.DPC_END = (u32*) Gfx_Info.DPC_END_REG; + REG.DPC_CURRENT = (u32*) Gfx_Info.DPC_CURRENT_REG; + REG.DPC_STATUS = (u32*) Gfx_Info.DPC_STATUS_REG; + REG.DPC_CLOCK = (u32*) Gfx_Info.DPC_CLOCK_REG; + REG.DPC_BUFBUSY = (u32*) Gfx_Info.DPC_BUFBUSY_REG; + REG.DPC_PIPEBUSY = (u32*) Gfx_Info.DPC_PIPEBUSY_REG; + REG.DPC_TMEM = (u32*) Gfx_Info.DPC_TMEM_REG; + + REG.VI_STATUS = (u32*) Gfx_Info.VI_STATUS_REG; + REG.VI_ORIGIN = (u32*) Gfx_Info.VI_ORIGIN_REG; + REG.VI_WIDTH = (u32*) Gfx_Info.VI_WIDTH_REG; + REG.VI_INTR = (u32*) Gfx_Info.VI_INTR_REG; + REG.VI_V_CURRENT_LINE = (u32*) Gfx_Info.VI_V_CURRENT_LINE_REG; + REG.VI_TIMING = (u32*) Gfx_Info.VI_TIMING_REG; + REG.VI_V_SYNC = (u32*) Gfx_Info.VI_V_SYNC_REG; + REG.VI_H_SYNC = (u32*) Gfx_Info.VI_H_SYNC_REG; + REG.VI_LEAP = (u32*) Gfx_Info.VI_LEAP_REG; + REG.VI_H_START = (u32*) Gfx_Info.VI_H_START_REG; + REG.VI_V_START = (u32*) Gfx_Info.VI_V_START_REG; + REG.VI_V_BURST = (u32*) Gfx_Info.VI_V_BURST_REG; + REG.VI_X_SCALE = (u32*) Gfx_Info.VI_X_SCALE_REG; + REG.VI_Y_SCALE = (u32*) Gfx_Info.VI_Y_SCALE_REG; + + CheckInterrupts = Gfx_Info.CheckInterrupts; + + Config_LoadConfig(); + Config_LoadRomConfig(Gfx_Info.HEADER); + + ticksInitialize(); + if( config.autoFrameSkip ) + frameSkipper.setSkips( FrameSkipper::AUTO, config.maxFrameSkip ); + else + frameSkipper.setSkips( FrameSkipper::MANUAL, config.maxFrameSkip ); + + OGL_Start(); + + return 1; +} + +EXPORT void CALL ProcessDList(void) +{ + OGL.frame_dl++; + + if (frameSkipper.willSkipNext()) + { + OGL.frameSkipped++; + RSP.busy = FALSE; + RSP.DList++; + + /* avoid hang on frameskip */ + *REG.MI_INTR |= MI_INTR_DP; + CheckInterrupts(); + *REG.MI_INTR |= MI_INTR_SP; + CheckInterrupts(); + return; + } + + OGL.consecutiveSkips = 0; + RSP_ProcessDList(); + OGL.mustRenderDlist = true; +} + +EXPORT void CALL ProcessRDPList(void) +{ +} + +EXPORT void CALL ResizeVideoOutput(int Width, int Height) +{ +} + +EXPORT void CALL RomClosed (void) +{ + OGL_Stop(); // paulscode, OGL_Stop missing from Yongzh's code +} + +EXPORT int CALL RomOpen (void) +{ + RSP_Init(); + OGL.frame_vsync = 0; + OGL.frame_dl = 0; + OGL.frame_prevdl = -1; + OGL.mustRenderDlist = false; + + frameSkipper.setTargetFPS(config.romPAL ? 50 : 60); + return 1; +} + +EXPORT void CALL RomResumed(void) +{ + frameSkipper.start(); +} + +EXPORT void CALL ShowCFB (void) +{ +} + +EXPORT void CALL UpdateScreen (void) +{ + frameSkipper.update(); + + //has there been any display lists since last update + if (OGL.frame_prevdl == OGL.frame_dl) return; + + OGL.frame_prevdl = OGL.frame_dl; + + if (OGL.frame_dl > 0) OGL.frame_vsync++; + + if (OGL.mustRenderDlist) + { + OGL.screenUpdate=true; + VI_UpdateScreen(); + OGL.mustRenderDlist = false; + } +} + +EXPORT void CALL ViStatusChanged (void) +{ +} + +EXPORT void CALL ViWidthChanged (void) +{ +} + +/****************************************************************** + Function: FrameBufferRead + Purpose: This function is called to notify the dll that the + frame buffer memory is beening read at the given address. + DLL should copy content from its render buffer to the frame buffer + in N64 RDRAM + DLL is responsible to maintain its own frame buffer memory addr list + DLL should copy 4KB block content back to RDRAM frame buffer. + Emulator should not call this function again if other memory + is read within the same 4KB range + + Since depth buffer is also being watched, the reported addr + may belong to depth buffer + input: addr rdram address + val val + size 1 = uint8, 2 = uint16, 4 = uint32 + output: none +*******************************************************************/ + +EXPORT void CALL FBRead(u32 addr) +{ +} + +/****************************************************************** + Function: FrameBufferWrite + Purpose: This function is called to notify the dll that the + frame buffer has been modified by CPU at the given address. + + Since depth buffer is also being watched, the reported addr + may belong to depth buffer + + input: addr rdram address + val val + size 1 = uint8, 2 = uint16, 4 = uint32 + output: none +*******************************************************************/ + +EXPORT void CALL FBWrite(u32 addr, u32 size) +{ +} + +/************************************************************************ +Function: FBGetFrameBufferInfo +Purpose: This function is called by the emulator core to retrieve frame + buffer information from the video plugin in order to be able + to notify the video plugin about CPU frame buffer read/write + operations + + size: + = 1 byte + = 2 word (16 bit) <-- this is N64 default depth buffer format + = 4 dword (32 bit) + + when frame buffer information is not available yet, set all values + in the FrameBufferInfo structure to 0 + +input: FrameBufferInfo pinfo[6] + pinfo is pointed to a FrameBufferInfo structure which to be + filled in by this function +output: Values are return in the FrameBufferInfo structure + Plugin can return up to 6 frame buffer info + ************************************************************************/ + +EXPORT void CALL FBGetFrameBufferInfo(void *p) +{ +} + +// paulscode, API changed this to "ReadScreen2" in Mupen64Plus 1.99.4 +EXPORT void CALL ReadScreen2(void *dest, int *width, int *height, int front) +{ +/* TODO: 'int front' was added in 1.99.4. What to do with this here? */ + OGL_ReadScreen(dest, width, height); +} + +EXPORT void CALL SetRenderingCallback(void (*callback)()) +{ + renderCallback = callback; +} + +EXPORT void CALL SetFrameSkipping(bool autoSkip, int maxSkips) +{ + frameSkipper.setSkips( + autoSkip ? FrameSkipper::AUTO : FrameSkipper::MANUAL, + maxSkips); +} + +EXPORT void CALL SetStretchVideo(bool stretch) +{ + config.stretchVideo = stretch; +} + +EXPORT void CALL StartGL() +{ + OGL_Start(); +} + +EXPORT void CALL StopGL() +{ + OGL_Stop(); +} + +EXPORT void CALL ResizeGL(int width, int height) +{ + const float ratio = (config.romPAL ? 9.0f/11.0f : 0.75f); + int videoWidth = width; + int videoHeight = height; + + if (!config.stretchVideo) { + videoWidth = (int) (height / ratio); + if (videoWidth > width) { + videoWidth = width; + videoHeight = (int) (width * ratio); + } + } + int x = (width - videoWidth) / 2; + int y = (height - videoHeight) / 2; + + OGL_ResizeWindow(x, y, videoWidth, videoHeight); +} + +} // extern "C" + diff --git a/source/gles2n64/src/gles2N64.h b/source/gles2n64/src/gles2N64.h new file mode 100644 index 0000000..044dcbe --- /dev/null +++ b/source/gles2n64/src/gles2N64.h @@ -0,0 +1,25 @@ +#ifndef GLN64_H +#define GLN64_H + +#include "m64p_config.h" +#include "stdio.h" + + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +//#define DEBUG + +#define PLUGIN_NAME "gles2n64" +#define PLUGIN_VERSION 0x000005 +#define PLUGIN_API_VERSION 0x020200 + +extern ptr_ConfigGetSharedDataFilepath ConfigGetSharedDataFilepath; + +extern void (*CheckInterrupts)( void ); +extern void (*renderCallback)(); + + +#endif + diff --git a/source/gles2n64/src/sdl2_compat.h b/source/gles2n64/src/sdl2_compat.h new file mode 100644 index 0000000..9f77089 --- /dev/null +++ b/source/gles2n64/src/sdl2_compat.h @@ -0,0 +1,783 @@ +/* + Simple DirectMedia Layer + Copyright (C) 1997-2012 Sam Lantinga + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + +#include + +typedef struct +{ + Uint8 *src; + int src_w, src_h; + int src_pitch; + int src_skip; + Uint8 *dst; + int dst_w, dst_h; + int dst_pitch; + int dst_skip; + SDL_PixelFormat *src_fmt; + SDL_PixelFormat *dst_fmt; + Uint8 *table; + int flags; + Uint32 colorkey; + Uint8 r, g, b, a; +} SDL_BlitInfo; + +/* Blit mapping definition */ +typedef struct SDL_BlitMap +{ + SDL_Surface *dst; + int identity; + SDL_blit blit; + void *data; + SDL_BlitInfo info; + + /* the version count matches the destination; mismatch indicates + an invalid mapping */ + Uint32 dst_palette_version; + Uint32 src_palette_version; +} SDL_BlitMap; + +typedef struct SDL_VideoInfo +{ + Uint32 hw_available:1; + Uint32 wm_available:1; + Uint32 UnusedBits1:6; + Uint32 UnusedBits2:1; + Uint32 blit_hw:1; + Uint32 blit_hw_CC:1; + Uint32 blit_hw_A:1; + Uint32 blit_sw:1; + Uint32 blit_sw_CC:1; + Uint32 blit_sw_A:1; + Uint32 blit_fill:1; + Uint32 UnusedBits3:16; + Uint32 video_mem; + + SDL_PixelFormat *vfmt; + + int current_w; + int current_h; +} SDL_VideoInfo; + +#define SDL_ANYFORMAT 0x00100000 +#define SDL_HWPALETTE 0x00200000 +#define SDL_FULLSCREEN 0x00800000 +#define SDL_RESIZABLE 0x01000000 +#define SDL_NOFRAME 0x02000000 +#define SDL_OPENGL 0x04000000 +#define SDL_HWSURFACE 0x08000001 /**< \note Not used */ + +#define SDL_BUTTON_WHEELUP 4 +#define SDL_BUTTON_WHEELDOWN 5 + +int initialized_video = 0; + +static SDL_Window *SDL_VideoWindow = NULL; +static SDL_Surface *SDL_WindowSurface = NULL; +static SDL_Surface *SDL_VideoSurface = NULL; +static SDL_Surface *SDL_ShadowSurface = NULL; +static SDL_Surface *SDL_PublicSurface = NULL; +static SDL_Rect SDL_VideoViewport; +static char *wm_title = NULL; +static Uint32 SDL_VideoFlags = 0; +static SDL_GLContext *SDL_VideoContext = NULL; +static SDL_Surface *SDL_VideoIcon; + +static void +SDL_WM_SetCaption(const char *title, const char *icon) +{ + if (wm_title) { + SDL_free(wm_title); + } + if (title) { + wm_title = SDL_strdup(title); + } else { + wm_title = NULL; + } + SDL_SetWindowTitle(SDL_VideoWindow, wm_title); +} + +static int +GetVideoDisplay() +{ + const char *variable = SDL_getenv("SDL_VIDEO_FULLSCREEN_DISPLAY"); + if ( !variable ) { + variable = SDL_getenv("SDL_VIDEO_FULLSCREEN_HEAD"); + } + if ( variable ) { + return SDL_atoi(variable); + } else { + return 0; + } +} + +static const SDL_VideoInfo * +SDL_GetVideoInfo(void) +{ + static SDL_VideoInfo info; + SDL_DisplayMode mode; + + /* Memory leak, compatibility code, who cares? */ + if (!info.vfmt && SDL_GetDesktopDisplayMode(GetVideoDisplay(), &mode) == 0) { + info.vfmt = SDL_AllocFormat(mode.format); + info.current_w = mode.w; + info.current_h = mode.h; + } + return &info; +} + +static SDL_Rect ** +SDL_ListModes(const SDL_PixelFormat * format, Uint32 flags) +{ + int i, nmodes; + SDL_Rect **modes; + + if (!initialized_video) { + return NULL; + } + + if (!(flags & SDL_FULLSCREEN)) { + return (SDL_Rect **) (-1); + } + + if (!format) { + format = SDL_GetVideoInfo()->vfmt; + } + + /* Memory leak, but this is a compatibility function, who cares? */ + nmodes = 0; + modes = NULL; + for (i = 0; i < SDL_GetNumDisplayModes(GetVideoDisplay()); ++i) { + SDL_DisplayMode mode; + int bpp; + + SDL_GetDisplayMode(GetVideoDisplay(), i, &mode); + if (!mode.w || !mode.h) { + return (SDL_Rect **) (-1); + } + + /* Copied from src/video/SDL_pixels.c:SDL_PixelFormatEnumToMasks */ + if (SDL_BYTESPERPIXEL(mode.format) <= 2) { + bpp = SDL_BITSPERPIXEL(mode.format); + } else { + bpp = SDL_BYTESPERPIXEL(mode.format) * 8; + } + + if (bpp != format->BitsPerPixel) { + continue; + } + if (nmodes > 0 && modes[nmodes - 1]->w == mode.w + && modes[nmodes - 1]->h == mode.h) { + continue; + } + + modes = (SDL_Rect**)SDL_realloc(modes, (nmodes + 2) * sizeof(*modes)); + if (!modes) { + return NULL; + } + modes[nmodes] = (SDL_Rect *) SDL_malloc(sizeof(SDL_Rect)); + if (!modes[nmodes]) { + return NULL; + } + modes[nmodes]->x = 0; + modes[nmodes]->y = 0; + modes[nmodes]->w = mode.w; + modes[nmodes]->h = mode.h; + ++nmodes; + } + if (modes) { + modes[nmodes] = NULL; + } + return modes; +} + +static void +SDL_UpdateRects(SDL_Surface * screen, int numrects, SDL_Rect * rects) +{ + int i; + + if (screen == SDL_ShadowSurface) { + for (i = 0; i < numrects; ++i) { + SDL_BlitSurface(SDL_ShadowSurface, &rects[i], SDL_VideoSurface, + &rects[i]); + } + + /* Fall through to video surface update */ + screen = SDL_VideoSurface; + } + if (screen == SDL_VideoSurface) { + if (SDL_VideoViewport.x || SDL_VideoViewport.y) { + SDL_Rect *stackrects = SDL_stack_alloc(SDL_Rect, numrects); + SDL_Rect *stackrect; + const SDL_Rect *rect; + + /* Offset all the rectangles before updating */ + for (i = 0; i < numrects; ++i) { + rect = &rects[i]; + stackrect = &stackrects[i]; + stackrect->x = SDL_VideoViewport.x + rect->x; + stackrect->y = SDL_VideoViewport.y + rect->y; + stackrect->w = rect->w; + stackrect->h = rect->h; + } + SDL_UpdateWindowSurfaceRects(SDL_VideoWindow, stackrects, numrects); + SDL_stack_free(stackrects); + } else { + SDL_UpdateWindowSurfaceRects(SDL_VideoWindow, rects, numrects); + } + } +} + +static void +SDL_UpdateRect(SDL_Surface * screen, Sint32 x, Sint32 y, Uint32 w, Uint32 h) +{ + if (screen) { + SDL_Rect rect; + + /* Fill the rectangle */ + rect.x = (int) x; + rect.y = (int) y; + rect.w = (int) (w ? w : screen->w); + rect.h = (int) (h ? h : screen->h); + SDL_UpdateRects(screen, 1, &rect); + } +} + +static int +SDL_Flip(SDL_Surface * screen) +{ + SDL_UpdateRect(screen, 0, 0, 0, 0); + return 0; +} + +/* + * Calculate the pad-aligned scanline width of a surface + */ +static int +SDL_CalculatePitch(SDL_Surface * surface) +{ + int pitch; + + /* Surface should be 4-byte aligned for speed */ + pitch = surface->w * surface->format->BytesPerPixel; + switch (surface->format->BitsPerPixel) { + case 1: + pitch = (pitch + 7) / 8; + break; + case 4: + pitch = (pitch + 1) / 2; + break; + default: + break; + } + pitch = (pitch + 3) & ~3; /* 4-byte aligning */ + return (pitch); +} + +static void +SDL_InvalidateMap(SDL_BlitMap * map) +{ + if (!map) { + return; + } + if (map->dst) { + /* Release our reference to the surface - see the note below */ + if (--map->dst->refcount <= 0) { + SDL_FreeSurface(map->dst); + } + } + map->dst = NULL; + map->src_palette_version = 0; + map->dst_palette_version = 0; + if (map->info.table) { + SDL_free(map->info.table); + map->info.table = NULL; + } +} + +static void +SDL_GL_SwapBuffers(void) +{ + SDL_GL_SwapWindow(SDL_VideoWindow); +} + +static int +SDL_WM_ToggleFullScreen(SDL_Surface * surface) +{ + int length; + void *pixels; + Uint8 *src, *dst; + int row; + int window_w; + int window_h; + + if (!SDL_PublicSurface) { + SDL_SetError("SDL_SetVideoMode() hasn't been called"); + return 0; + } + + /* Copy the old bits out */ + length = SDL_PublicSurface->w * SDL_PublicSurface->format->BytesPerPixel; + pixels = SDL_malloc(SDL_PublicSurface->h * length); + if (pixels && SDL_PublicSurface->pixels) { + src = (Uint8*)SDL_PublicSurface->pixels; + dst = (Uint8*)pixels; + for (row = 0; row < SDL_PublicSurface->h; ++row) { + SDL_memcpy(dst, src, length); + src += SDL_PublicSurface->pitch; + dst += length; + } + } + + /* Do the physical mode switch */ + if (SDL_GetWindowFlags(SDL_VideoWindow) & SDL_WINDOW_FULLSCREEN) { + if (SDL_SetWindowFullscreen(SDL_VideoWindow, 0) < 0) { + return 0; + } + SDL_PublicSurface->flags &= ~SDL_FULLSCREEN; + } else { + if (SDL_SetWindowFullscreen(SDL_VideoWindow, 1) < 0) { + return 0; + } + SDL_PublicSurface->flags |= SDL_FULLSCREEN; + } + + /* Recreate the screen surface */ + SDL_WindowSurface = SDL_GetWindowSurface(SDL_VideoWindow); + if (!SDL_WindowSurface) { + /* We're totally hosed... */ + return 0; + } + + /* Center the public surface in the window surface */ + SDL_GetWindowSize(SDL_VideoWindow, &window_w, &window_h); + SDL_VideoViewport.x = (window_w - SDL_VideoSurface->w)/2; + SDL_VideoViewport.y = (window_h - SDL_VideoSurface->h)/2; + SDL_VideoViewport.w = SDL_VideoSurface->w; + SDL_VideoViewport.h = SDL_VideoSurface->h; + + /* Do some shuffling behind the application's back if format changes */ + if (SDL_VideoSurface->format->format != SDL_WindowSurface->format->format) { + if (SDL_ShadowSurface) { + if (SDL_ShadowSurface->format->format == SDL_WindowSurface->format->format) { + /* Whee! We don't need a shadow surface anymore! */ + SDL_VideoSurface->flags &= ~SDL_DONTFREE; + SDL_FreeSurface(SDL_VideoSurface); + SDL_free(SDL_ShadowSurface->pixels); + SDL_VideoSurface = SDL_ShadowSurface; + SDL_VideoSurface->flags |= SDL_PREALLOC; + SDL_ShadowSurface = NULL; + } else { + /* No problem, just change the video surface format */ + SDL_FreeFormat(SDL_VideoSurface->format); + SDL_VideoSurface->format = SDL_WindowSurface->format; + SDL_VideoSurface->format->refcount++; + SDL_InvalidateMap(SDL_ShadowSurface->map); + } + } else { + /* We can make the video surface the shadow surface */ + SDL_ShadowSurface = SDL_VideoSurface; + SDL_ShadowSurface->pitch = SDL_CalculatePitch(SDL_ShadowSurface); + SDL_ShadowSurface->pixels = SDL_malloc(SDL_ShadowSurface->h * SDL_ShadowSurface->pitch); + if (!SDL_ShadowSurface->pixels) { + /* Uh oh, we're hosed */ + SDL_ShadowSurface = NULL; + return 0; + } + SDL_ShadowSurface->flags &= ~SDL_PREALLOC; + + SDL_VideoSurface = SDL_CreateRGBSurfaceFrom(NULL, 0, 0, 32, 0, 0, 0, 0, 0); + SDL_VideoSurface->flags = SDL_ShadowSurface->flags; + SDL_VideoSurface->flags |= SDL_PREALLOC; + SDL_FreeFormat(SDL_VideoSurface->format); + SDL_VideoSurface->format = SDL_WindowSurface->format; + SDL_VideoSurface->format->refcount++; + SDL_VideoSurface->w = SDL_ShadowSurface->w; + SDL_VideoSurface->h = SDL_ShadowSurface->h; + } + } + + /* Update the video surface */ + SDL_VideoSurface->pitch = SDL_WindowSurface->pitch; + SDL_VideoSurface->pixels = (void *)((Uint8 *)SDL_WindowSurface->pixels + + SDL_VideoViewport.y * SDL_VideoSurface->pitch + + SDL_VideoViewport.x * SDL_VideoSurface->format->BytesPerPixel); + SDL_SetClipRect(SDL_VideoSurface, NULL); + + /* Copy the old bits back */ + if (pixels) { + src = (Uint8*)pixels; + dst = (Uint8*)SDL_PublicSurface->pixels; + for (row = 0; row < SDL_PublicSurface->h; ++row) { + SDL_memcpy(dst, src, length); + src += length; + dst += SDL_PublicSurface->pitch; + } + SDL_Flip(SDL_PublicSurface); + SDL_free(pixels); + } + + /* We're done! */ + return 1; +} + +static void +ClearVideoSurface() +{ + if (SDL_ShadowSurface) { + SDL_FillRect(SDL_ShadowSurface, NULL, + SDL_MapRGB(SDL_ShadowSurface->format, 0, 0, 0)); + } + SDL_FillRect(SDL_WindowSurface, NULL, 0); + SDL_UpdateWindowSurface(SDL_VideoWindow); +} + +static int +SDL_ResizeVideoMode(int width, int height, int bpp, Uint32 flags) +{ + int w, h; + + /* We can't resize something we don't have... */ + if (!SDL_VideoSurface) { + return -1; + } + + /* We probably have to recreate the window in fullscreen mode */ + if (flags & SDL_FULLSCREEN) { + return -1; + } + + /* I don't think there's any change we can gracefully make in flags */ + if (flags != SDL_VideoFlags) { + return -1; + } + if (bpp != SDL_VideoSurface->format->BitsPerPixel) { + return -1; + } + + /* Resize the window */ + SDL_GetWindowSize(SDL_VideoWindow, &w, &h); + if (w != width || h != height) { + SDL_SetWindowSize(SDL_VideoWindow, width, height); + } + + /* If we're in OpenGL mode, just resize the stub surface and we're done! */ + if (flags & SDL_OPENGL) { + SDL_VideoSurface->w = width; + SDL_VideoSurface->h = height; + return 0; + } + + SDL_WindowSurface = SDL_GetWindowSurface(SDL_VideoWindow); + if (!SDL_WindowSurface) { + return -1; + } + if (SDL_VideoSurface->format != SDL_WindowSurface->format) { + return -1; + } + SDL_VideoSurface->w = width; + SDL_VideoSurface->h = height; + SDL_VideoSurface->pixels = SDL_WindowSurface->pixels; + SDL_VideoSurface->pitch = SDL_WindowSurface->pitch; + SDL_SetClipRect(SDL_VideoSurface, NULL); + + if (SDL_ShadowSurface) { + SDL_ShadowSurface->w = width; + SDL_ShadowSurface->h = height; + SDL_ShadowSurface->pitch = SDL_CalculatePitch(SDL_ShadowSurface); + SDL_ShadowSurface->pixels = + SDL_realloc(SDL_ShadowSurface->pixels, + SDL_ShadowSurface->h * SDL_ShadowSurface->pitch); + SDL_SetClipRect(SDL_ShadowSurface, NULL); + SDL_InvalidateMap(SDL_ShadowSurface->map); + } else { + SDL_PublicSurface = SDL_VideoSurface; + } + + ClearVideoSurface(); + + return 0; +} + +static int +SDL_CompatEventFilter(void *userdata, SDL_Event * event) +{ + SDL_Event fake; + + switch (event->type) { + case SDL_WINDOWEVENT: + switch (event->window.event) { + case SDL_WINDOWEVENT_CLOSE: + fake.type = SDL_QUIT; + SDL_PushEvent(&fake); + break; + } + case SDL_TEXTINPUT: + { + /* FIXME: Generate an old style key repeat event if needed */ + //printf("TEXTINPUT: '%s'\n", event->text.text); + break; + } + case SDL_MOUSEMOTION: + { + event->motion.x -= SDL_VideoViewport.x; + event->motion.y -= SDL_VideoViewport.y; + break; + } + case SDL_MOUSEBUTTONDOWN: + case SDL_MOUSEBUTTONUP: + { + event->button.x -= SDL_VideoViewport.x; + event->button.y -= SDL_VideoViewport.y; + break; + } + case SDL_MOUSEWHEEL: + { + Uint8 button; + int x, y; + + if (event->wheel.y == 0) { + break; + } + + SDL_GetMouseState(&x, &y); + + if (event->wheel.y > 0) { + button = SDL_BUTTON_WHEELUP; + } else { + button = SDL_BUTTON_WHEELDOWN; + } + + fake.button.button = button; + fake.button.x = x; + fake.button.y = y; + fake.button.windowID = event->wheel.windowID; + + fake.type = SDL_MOUSEBUTTONDOWN; + fake.button.state = SDL_PRESSED; + SDL_PushEvent(&fake); + + fake.type = SDL_MOUSEBUTTONUP; + fake.button.state = SDL_RELEASED; + SDL_PushEvent(&fake); + break; + } + + } + return 1; +} + +static void +GetEnvironmentWindowPosition(int w, int h, int *x, int *y) +{ + int display = GetVideoDisplay(); + const char *window = SDL_getenv("SDL_VIDEO_WINDOW_POS"); + const char *center = SDL_getenv("SDL_VIDEO_CENTERED"); + if (window) { + if (SDL_sscanf(window, "%d,%d", x, y) == 2) { + return; + } + if (SDL_strcmp(window, "center") == 0) { + center = window; + } + } + if (center) { + *x = SDL_WINDOWPOS_CENTERED_DISPLAY(display); + *y = SDL_WINDOWPOS_CENTERED_DISPLAY(display); + } +} + +static SDL_Surface * +SDL_SetVideoMode(int width, int height, int bpp, Uint32 flags) +{ + SDL_DisplayMode desktop_mode; + int display = GetVideoDisplay(); + int window_x = SDL_WINDOWPOS_UNDEFINED_DISPLAY(display); + int window_y = SDL_WINDOWPOS_UNDEFINED_DISPLAY(display); + int window_w; + int window_h; + Uint32 window_flags; + Uint32 surface_flags; + + if (!initialized_video) { + if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) < 0) { + return NULL; + } + initialized_video = 1; + } + + SDL_GetDesktopDisplayMode(display, &desktop_mode); + + if (width == 0) { + width = desktop_mode.w; + } + if (height == 0) { + height = desktop_mode.h; + } + if (bpp == 0) { + bpp = SDL_BITSPERPIXEL(desktop_mode.format); + } + + /* See if we can simply resize the existing window and surface */ + if (SDL_ResizeVideoMode(width, height, bpp, flags) == 0) { + return SDL_PublicSurface; + } + + /* Destroy existing window */ + SDL_PublicSurface = NULL; + if (SDL_ShadowSurface) { + SDL_ShadowSurface->flags &= ~SDL_DONTFREE; + SDL_FreeSurface(SDL_ShadowSurface); + SDL_ShadowSurface = NULL; + } + if (SDL_VideoSurface) { + SDL_VideoSurface->flags &= ~SDL_DONTFREE; + SDL_FreeSurface(SDL_VideoSurface); + SDL_VideoSurface = NULL; + } + if (SDL_VideoContext) { + /* SDL_GL_MakeCurrent(0, NULL); *//* Doesn't do anything */ + SDL_GL_DeleteContext(SDL_VideoContext); + SDL_VideoContext = NULL; + } + if (SDL_VideoWindow) { + SDL_GetWindowPosition(SDL_VideoWindow, &window_x, &window_y); + SDL_DestroyWindow(SDL_VideoWindow); + } + + /* Set up the event filter */ + if (!SDL_GetEventFilter(NULL, NULL)) { + SDL_SetEventFilter(SDL_CompatEventFilter, NULL); + } + + /* Create a new window */ + window_flags = SDL_WINDOW_SHOWN; + if (flags & SDL_FULLSCREEN) { + window_flags |= SDL_WINDOW_FULLSCREEN; + } + if (flags & SDL_OPENGL) { + window_flags |= SDL_WINDOW_OPENGL; + } + if (flags & SDL_RESIZABLE) { + window_flags |= SDL_WINDOW_RESIZABLE; + } + if (flags & SDL_NOFRAME) { + window_flags |= SDL_WINDOW_BORDERLESS; + } + GetEnvironmentWindowPosition(width, height, &window_x, &window_y); + SDL_VideoWindow = + SDL_CreateWindow(wm_title, window_x, window_y, width, height, + window_flags); + if (!SDL_VideoWindow) { + return NULL; + } + SDL_SetWindowIcon(SDL_VideoWindow, SDL_VideoIcon); + + window_flags = SDL_GetWindowFlags(SDL_VideoWindow); + surface_flags = 0; + if (window_flags & SDL_WINDOW_FULLSCREEN) { + surface_flags |= SDL_FULLSCREEN; + } + if ((window_flags & SDL_WINDOW_OPENGL) && (flags & SDL_OPENGL)) { + surface_flags |= SDL_OPENGL; + } + if (window_flags & SDL_WINDOW_RESIZABLE) { + surface_flags |= SDL_RESIZABLE; + } + if (window_flags & SDL_WINDOW_BORDERLESS) { + surface_flags |= SDL_NOFRAME; + } + + SDL_VideoFlags = flags; + + /* If we're in OpenGL mode, just create a stub surface and we're done! */ + if (flags & SDL_OPENGL) { + SDL_VideoContext = (SDL_GLContext *)SDL_GL_CreateContext(SDL_VideoWindow); + if (!SDL_VideoContext) { + return NULL; + } + if (SDL_GL_MakeCurrent(SDL_VideoWindow, SDL_VideoContext) < 0) { + return NULL; + } + SDL_VideoSurface = + SDL_CreateRGBSurfaceFrom(NULL, width, height, bpp, 0, 0, 0, 0, 0); + if (!SDL_VideoSurface) { + return NULL; + } + SDL_VideoSurface->flags |= surface_flags; + SDL_PublicSurface = SDL_VideoSurface; + return SDL_PublicSurface; + } + + /* Create the screen surface */ + SDL_WindowSurface = SDL_GetWindowSurface(SDL_VideoWindow); + if (!SDL_WindowSurface) { + return NULL; + } + + /* Center the public surface in the window surface */ + SDL_GetWindowSize(SDL_VideoWindow, &window_w, &window_h); + SDL_VideoViewport.x = (window_w - width)/2; + SDL_VideoViewport.y = (window_h - height)/2; + SDL_VideoViewport.w = width; + SDL_VideoViewport.h = height; + + SDL_VideoSurface = SDL_CreateRGBSurfaceFrom(NULL, 0, 0, 32, 0, 0, 0, 0, 0); + SDL_VideoSurface->flags |= surface_flags; + SDL_VideoSurface->flags |= SDL_DONTFREE; + SDL_FreeFormat(SDL_VideoSurface->format); + SDL_VideoSurface->format = SDL_WindowSurface->format; + SDL_VideoSurface->format->refcount++; + SDL_VideoSurface->w = width; + SDL_VideoSurface->h = height; + SDL_VideoSurface->pitch = SDL_WindowSurface->pitch; + SDL_VideoSurface->pixels = (void *)((Uint8 *)SDL_WindowSurface->pixels + + SDL_VideoViewport.y * SDL_VideoSurface->pitch + + SDL_VideoViewport.x * SDL_VideoSurface->format->BytesPerPixel); + SDL_SetClipRect(SDL_VideoSurface, NULL); + + /* Create a shadow surface if necessary */ + if ((bpp != SDL_VideoSurface->format->BitsPerPixel) + && !(flags & SDL_ANYFORMAT)) { + SDL_ShadowSurface = + SDL_CreateRGBSurface(0, width, height, bpp, 0, 0, 0, 0); + if (!SDL_ShadowSurface) { + return NULL; + } + SDL_ShadowSurface->flags |= surface_flags; + SDL_ShadowSurface->flags |= SDL_DONTFREE; + + /* 8-bit SDL_ShadowSurface surfaces report that they have exclusive palette */ + if (SDL_ShadowSurface->format->palette) { + SDL_ShadowSurface->flags |= SDL_HWPALETTE; + //TODO SDL_DitherColors(SDL_ShadowSurface->format->palette->colors, + // SDL_ShadowSurface->format->BitsPerPixel); + } + SDL_FillRect(SDL_ShadowSurface, NULL, + SDL_MapRGB(SDL_ShadowSurface->format, 0, 0, 0)); + } + SDL_PublicSurface = + (SDL_ShadowSurface ? SDL_ShadowSurface : SDL_VideoSurface); + + ClearVideoSurface(); + + /* We're finally done! */ + return SDL_PublicSurface; +} diff --git a/source/gles2n64/src/ticks.c b/source/gles2n64/src/ticks.c new file mode 100644 index 0000000..7819dcb --- /dev/null +++ b/source/gles2n64/src/ticks.c @@ -0,0 +1,35 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright (C) 2011 yongzh (freeman.yong@gmail.com) * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include + +static struct timespec startTicks; + +void ticksInitialize() +{ + clock_gettime(CLOCK_MONOTONIC, &startTicks); +} + +unsigned int ticksGetTicks() +{ + struct timespec now; + clock_gettime(CLOCK_MONOTONIC, &now); + return (now.tv_sec - startTicks.tv_sec) * 1000 + + (now.tv_nsec - startTicks.tv_nsec) / 1000000; +} diff --git a/source/gles2n64/src/ticks.h b/source/gles2n64/src/ticks.h new file mode 100644 index 0000000..5960d19 --- /dev/null +++ b/source/gles2n64/src/ticks.h @@ -0,0 +1,34 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright (C) 2011 yongzh (freeman.yong@gmail.com) * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef EMUTICKS_H +#define EMUTICKS_H + +#ifdef __cplusplus +extern "C" { +#endif + +void ticksInitialize(); +unsigned int ticksGetTicks(); + +#ifdef __cplusplus +} +#endif +#endif + diff --git a/source/gles2n64/src/video_api_export.ver b/source/gles2n64/src/video_api_export.ver new file mode 100644 index 0000000..96bc0fc --- /dev/null +++ b/source/gles2n64/src/video_api_export.ver @@ -0,0 +1,28 @@ +{ global: +PluginStartup; +PluginShutdown; +PluginGetVersion; +ChangeWindow; +InitiateGFX; +MoveScreen; +ProcessDList; +ProcessRDPList; +RomClosed; +RomOpen; +RomResumed; +ShowCFB; +UpdateScreen; +ViStatusChanged; +ViWidthChanged; +ReadScreen2; +SetRenderingCallback; +ResizeVideoOutput; +SetFrameSkipping; +SetStretchVideo; +FBRead; +FBWrite; +FBGetFrameBufferInfo; +StartGL; +StopGL; +ResizeGL; +local: *; }; -- 2.39.2