endif
# base CFLAGS, LDLIBS, and LDFLAGS
-OPTFLAGS ?= -Ofast -ffast-math -flto -fuse-linker-plugin
+OPTFLAGS ?= -Ofast -ffast-math
+#-flto -fuse-linker-plugin
WARNFLAGS ?= -Wall
CFLAGS += $(OPTFLAGS) $(WARNFLAGS) -ffast-math -fno-strict-aliasing -fvisibility=hidden -I../../src -I../../src/Glitch64/inc -DGCC
CXXFLAGS += -fvisibility-inlines-hidden -std=gnu++0x
LDLIBS += -ldl
endif
ifeq ($(OS), OSX)
- # Select the proper SDK
- # Also, SDKs are stored in a different location since XCode 4.3
- OSX_SDK ?= $(shell sw_vers -productVersion | cut -f1 -f2 -d .)
- OSX_XCODEMAJ = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f1 -d .)
- OSX_XCODEMIN = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f2 -d .)
- OSX_XCODEGE43 = $(shell echo "`expr $(OSX_XCODEMAJ) \>= 4``expr $(OSX_XCODEMIN) \>= 3`")
- ifeq ($(OSX_XCODEGE43), 11)
- OSX_SYSROOT := /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
- else
- OSX_SYSROOT := /Developer/SDKs
- endif
+ #xcode-select has been around since XCode 3.0, i.e. OS X 10.5
+ OSX_SDK_ROOT = $(shell xcode-select -print-path)/Platforms/MacOSX.platform/Developer/SDKs
+ OSX_SDK_PATH = $(OSX_SDK_ROOT)/$(shell ls $(OSX_SDK_ROOT) | tail -1)
CXXFLAGS += -std=c++11 -stdlib=libc++
ifeq ($(CPU), X86)
LDFLAGS += -bundle -L/opt/local/lib
LDLIBS += -ldl
ifeq ($(ARCH_DETECTED), 64BITS)
- CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
else
- CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
endif
endif
endif
CFLAGS += $(LIBPNG_CFLAGS)
LDLIBS += $(LIBPNG_LDLIBS)
-
# search for OpenGL libraries
ifeq ($(OS), OSX)
GL_LDLIBS = -framework OpenGL
$(SRCDIR)/GlideHQ/TxReSample.cpp \
$(SRCDIR)/GlideHQ/TxDbg.cpp \
$(SRCDIR)/GlideHQ/tc-1.1+/fxt1.c \
- $(SRCDIR)/GlideHQ/tc-1.1+/dxtn.c \
$(SRCDIR)/GlideHQ/tc-1.1+/wrapper.c \
$(SRCDIR)/GlideHQ/tc-1.1+/texstore.c
-CPPFLAGS += -DTEXTURE_FILTER # -DDUMP_CACHE
+CPPFLAGS += -DTEXTURE_FILTER -DDUMP_CACHE
LDLIBS += -lboost_filesystem$(BOOST_SUFFIX) -lboost_system$(BOOST_SUFFIX)
+
+ ifeq ($(TXCDXTN), 1)
+ CPPFLAGS += -DTXCDXTN_EXTERNAL
+ else
+ SOURCE += \
+ $(SRCDIR)/GlideHQ/tc-1.1+/s2tc/s2tc_algorithm.cpp \
+ $(SRCDIR)/GlideHQ/tc-1.1+/s2tc/s2tc_libtxc_dxtn.cpp
+ endif
endif
ifeq ($(OS),MINGW)
@echo " PIC=(1|0) == Force enable/disable of position independent code"
@echo " POSTFIX=name == String added to the name of the the build (default: '')"
@echo " HIRES=(1|0) == Enables/Disables support for hires textures and texture filters (default: 1)"
+ @echo " TXCDXTN=(1|0) == Enable/Disable external txc_dxtn library (default: 0)"
@echo " Install Options:"
@echo " PREFIX=path == install/uninstall prefix (default: /usr/local)"
@echo " SHAREDIR=path == path to install shared data files (default: PREFIX/share/mupen64plus)"
rebuild: clean all
# build dependency files
-CFLAGS += -MD
+CFLAGS += -MD -MP
-include $(OBJECTS:.o=.d)
CXXFLAGS += $(CFLAGS)
#define PATH_MAX _MAX_PATH
#define stricmp _stricmp
#endif
+#ifndef PATH_MAX
+ #define PATH_MAX 4096
+#endif
FILE *ini;
int sectionstart;
#include <time.h>
#define PATH_MAX MAX_PATH
#endif
+#ifndef PATH_MAX
+ #define PATH_MAX 4096
+#endif
#include "osal_dynamiclib.h"
#ifdef TEXTURE_FILTER // Hiroshi Morii <koolsmoky@users.sourceforge.net>
#include <stdarg.h>
if (configDir)
{
SetConfigDir(configDir);
- CoreVideo_Init();
ReadSettings();
return M64ERR_SUCCESS;
}
lx = lc; \
}
-#if defined(__GNUC__)
- #define bswap32(x) __builtin_bswap32(x)
-#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
#include <stdlib.h>
#define bswap32(x) _byteswap_ulong(x)
#else
#define CONFIG_API_VERSION 0x020000
#define VIDEXT_API_VERSION 0x030000
+#ifdef __cplusplus
+extern "C" {
+#endif
void WriteLog(m64p_msg_level level, const char *msg, ...);
+#ifdef __cplusplus
+}
+#endif
//The Glide API originally used an integer to pick an enumerated resolution.
//To accomodate arbitrary resolutions, pack it into a 32-bit struct
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- * Mupen64plus-core - osal/dynamiclib.h *
+ * Mupen64plus-video-glide64mk2 - osal_dynamiclib.h *
* Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
* Copyright (C) 2009 Richard Goedeken *
* *
#if !defined(OSAL_DYNAMICLIB_H)
#define OSAL_DYNAMICLIB_H
+#include "m64p_types.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#include "m64p_types.h"
+m64p_error osal_dynlib_open(m64p_dynlib_handle *pLibHandle, const char *pccLibraryPath);
void * osal_dynlib_getproc(m64p_dynlib_handle LibHandle, const char *pccProcedureName);
+m64p_error osal_dynlib_close(m64p_dynlib_handle LibHandle);
+
#ifdef __cplusplus
}
#endif
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- * Mupen64plus-core - osal/dynamiclib_unix.c *
+ * Mupen64plus-video-glide64mk2 - osal_dynamiclib_unix.c *
* Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
* Copyright (C) 2009 Richard Goedeken *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include <stdlib.h>
+#include <string.h>
#include <stdio.h>
#include <dlfcn.h>
#include "m64p_types.h"
+#include "m64p.h"
#include "osal_dynamiclib.h"
+m64p_error osal_dynlib_open(m64p_dynlib_handle *pLibHandle, const char *pccLibraryPath)
+{
+ if (pLibHandle == NULL || pccLibraryPath == NULL)
+ return M64ERR_INPUT_ASSERT;
+
+ *pLibHandle = dlopen(pccLibraryPath, RTLD_NOW);
+
+ if (*pLibHandle == NULL)
+ {
+ /* only print an error message if there is a directory separator (/) in the pathname */
+ /* this prevents us from throwing an error for the use case where Mupen64Plus is not installed */
+ if (strchr(pccLibraryPath, '/') != NULL)
+ WriteLog(M64MSG_ERROR, "dlopen('%s') failed: %s", pccLibraryPath, dlerror());
+ return M64ERR_INPUT_NOT_FOUND;
+ }
+
+ return M64ERR_SUCCESS;
+}
+
void * osal_dynlib_getproc(m64p_dynlib_handle LibHandle, const char *pccProcedureName)
{
if (pccProcedureName == NULL)
return dlsym(LibHandle, pccProcedureName);
}
+m64p_error osal_dynlib_close(m64p_dynlib_handle LibHandle)
+{
+ int rval = dlclose(LibHandle);
+
+ if (rval != 0)
+ {
+ WriteLog(M64MSG_ERROR, "dlclose() failed: %s", dlerror());
+ return M64ERR_INTERNAL;
+ }
+
+ return M64ERR_SUCCESS;
+}
+
#include "TxFilter.h"
#include "TextureFilters.h"
#include "TxDbg.h"
+#ifndef NO_FILTER_THREAD
#include <functional>
#include <thread>
+#endif
+#if defined(__MINGW32__)
+#define swprintf _snwprintf
+#endif
void TxFilter::clear()
{
#pragma warning(disable: 4786)
#endif
+#ifndef NO_FILTER_THREAD
#include <functional>
#include <thread>
+#endif
/* NOTE: The codes are not optimized. They can be made faster. */
/* get dxtn extensions */
_tx_compress_fxt1 = TxLoadLib::getInstance()->getfxtCompressTexFuncExt();
- _tx_compress_dxtn = TxLoadLib::getInstance()->getdxtCompressTexFuncExt();
+ _tx_compress_dxtn_rgba = TxLoadLib::getInstance()->getdxtCompressTexFuncExt();
}
boolean bRet = 0;
- if (_tx_compress_dxtn &&
+ if (_tx_compress_dxtn_rgba &&
srcwidth >= 4 && srcheight >= 4) {
/* compress to dxtn
* width and height must be larger than 4
unsigned int srcStride = (srcwidth * blkheight) << 2;
unsigned int destStride = dstRowStride * blkrow;
for (i = 0; i < numcore - 1; i++) {
- thrd[i] = new std::thread(std::bind(_tx_compress_dxtn,
+ thrd[i] = new std::thread(std::bind(_tx_compress_dxtn_rgba,
4,
srcwidth,
blkheight,
src += srcStride;
dest += destStride;
}
- thrd[i] = new std::thread(std::bind(_tx_compress_dxtn,
+ thrd[i] = new std::thread(std::bind(_tx_compress_dxtn_rgba,
4,
srcwidth,
srcheight - blkheight * i,
delete thrd[i];
}
} else {
- (*_tx_compress_dxtn)(4, /* comps: ARGB8888=4, RGB888=3 */
+ (*_tx_compress_dxtn_rgba)(4, /* comps: ARGB8888=4, RGB888=3 */
srcwidth, /* width */
srcheight, /* height */
src, /* source */
* others = 16 bytes per 4x4 texel */
}
#else
- (*_tx_compress_dxtn)(4, /* comps: ARGB8888=4, RGB888=3 */
+ (*_tx_compress_dxtn_rgba)(4, /* comps: ARGB8888=4, RGB888=3 */
srcwidth, /* width */
srcheight, /* height */
src, /* source */
int _numcore;
fxtCompressTexFuncExt _tx_compress_fxt1;
- dxtCompressTexFuncExt _tx_compress_dxtn;
+ dxtCompressTexFuncExt _tx_compress_dxtn_rgba;
/* fast optimized... well, sort of. */
void ARGB1555_ARGB8888(uint32* src, uint32* dst, int width, int height);
_dxtnlib = LoadLibrary("dxtn");
if (_dxtnlib) {
- if (!_tx_compress_dxtn)
- _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");
+ if (!_tx_compress_dxtn_rgba)
+ _tx_compress_dxtn_rgba = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn_rgba");
if (!_tx_compress_fxt1)
_tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
}
#else
- _tx_compress_dxtn = tx_compress_dxtn;
+ _tx_compress_dxtn_rgba = tx_compress_dxtn_rgba;
_tx_compress_fxt1 = fxt1_encode;
#endif
dxtCompressTexFuncExt
TxLoadLib::getdxtCompressTexFuncExt()
{
- return _tx_compress_dxtn;
+ return _tx_compress_dxtn_rgba;
}
#ifdef __cplusplus
extern "C"{
#endif
-void tx_compress_dxtn(int srccomps, int width, int height,
+void tx_compress_dxtn_rgba(int srccomps, int width, int height,
const void *source, int destformat, void *dest,
int destRowStride);
HMODULE _dxtnlib;
#endif
fxtCompressTexFuncExt _tx_compress_fxt1;
- dxtCompressTexFuncExt _tx_compress_dxtn;
+ dxtCompressTexFuncExt _tx_compress_dxtn_rgba;
TxLoadLib();
public:
static TxLoadLib* getInstance() {
+++ /dev/null
-/*
- * DXTn codec
- * Version: 1.1
- *
- * Copyright (C) 2004 Daniel Borca All Rights Reserved.
- *
- * this is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * this is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
- * Added support for ARGB inputs, DXT3,5 workaround for ATI Radeons, and
- * YUV conversions to determine representative colors.
- */
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#include <stdio.h>
-
-#include "types.h"
-#include "internal.h"
-#include "dxtn.h"
-
-
-/***************************************************************************\
- * DXTn encoder
- *
- * The encoder was built by reversing the decoder,
- * and is vaguely based on FXT1 codec. Note that this code
- * is merely a proof of concept, since it is highly UNoptimized!
-\***************************************************************************/
-
-
-#define MAX_COMP 4 /* ever needed maximum number of components in texel */
-#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
-#define N_TEXELS 16 /* number of texels in a block (always 16) */
-#define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3))
-
-
-static const int dxtn_color_tlat[2][4] = {
- { 0, 2, 3, 1 },
- { 0, 2, 1, 3 }
-};
-
-static const int dxtn_alpha_tlat[2][8] = {
- { 0, 2, 3, 4, 5, 6, 7, 1 },
- { 0, 2, 3, 4, 5, 1, 6, 7 }
-};
-
-
-static void
-dxt1_rgb_quantize (dword *cc, const byte *lines[], int comps)
-{
- float b, iv[MAX_COMP]; /* interpolation vector */
-
- dword hi; /* high doubleword */
- int color0, color1;
- int n_vect;
- const int n_comp = 3;
- int black = 0;
-
-#ifndef YUV
- int minSum = 2000; /* big enough */
-#else
- int minSum = 2000000;
-#endif
- int maxSum = -1; /* small enough */
- int minCol = 0; /* phoudoin: silent compiler! */
- int maxCol = 0; /* phoudoin: silent compiler! */
-
- byte input[N_TEXELS][MAX_COMP];
- int i, k, l;
-
- /* make the whole block opaque */
- /* we will NEVER reference ACOMP of any pixel */
-
- /* 4 texels each line */
-#ifndef ARGB
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- for (i = 0; i < comps; i++) {
- input[k + l * 4][i] = *lines[l]++;
- }
- }
- }
-#else
- /* H.Morii - support for ARGB inputs */
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- input[k + l * 4][2] = *lines[l]++;
- input[k + l * 4][1] = *lines[l]++;
- input[k + l * 4][0] = *lines[l]++;
- if (comps == 4) input[k + l * 4][3] = *lines[l]++;
- }
- }
-#endif
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 4x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
- for (k = 0; k < N_TEXELS; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- /* RGB to YUV conversion according to CCIR 601 specs
- * Y = 0.299R+0.587G+0.114B
- * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
- * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
- */
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minCol = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxCol = k;
- }
- if (sum == 0) {
- black = 1;
- }
- }
-
- color0 = COLOR565(input[minCol]);
- color1 = COLOR565(input[maxCol]);
-
- if (color0 == color1) {
- /* we'll use 3-vector */
- cc[0] = color0 | (color1 << 16);
- hi = black ? -1 : 0;
- } else {
- if (black && ((color0 == 0) || (color1 == 0))) {
- /* we still can use 4-vector */
- black = 0;
- }
-
- if (black ^ (color0 <= color1)) {
- int aux;
- aux = color0;
- color0 = color1;
- color1 = aux;
- aux = minCol;
- minCol = maxCol;
- maxCol = aux;
- }
- n_vect = (color0 <= color1) ? 2 : 3;
-
- MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
- /* add in texels */
- cc[0] = color0 | (color1 << 16);
- hi = 0;
- for (k = N_TEXELS - 1; k >= 0; k--) {
- int texel = 3;
- int sum = 0;
- if (black) {
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
- }
- if (!black || sum) {
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- texel = dxtn_color_tlat[black][texel];
- }
- /* add in texel */
- hi <<= 2;
- hi |= texel;
- }
- }
- cc[1] = hi;
-}
-
-
-static void
-dxt1_rgba_quantize (dword *cc, const byte *lines[], int comps)
-{
- float b, iv[MAX_COMP]; /* interpolation vector */
-
- dword hi; /* high doubleword */
- int color0, color1;
- int n_vect;
- const int n_comp = 3;
- int transparent = 0;
-
-#ifndef YUV
- int minSum = 2000; /* big enough */
-#else
- int minSum = 2000000;
-#endif
- int maxSum = -1; /* small enough */
- int minCol = 0; /* phoudoin: silent compiler! */
- int maxCol = 0; /* phoudoin: silent compiler! */
-
- byte input[N_TEXELS][MAX_COMP];
- int i, k, l;
-
- if (comps == 3) {
- /* make the whole block opaque */
- memset(input, -1, sizeof(input));
- }
-
- /* 4 texels each line */
-#ifndef ARGB
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- for (i = 0; i < comps; i++) {
- input[k + l * 4][i] = *lines[l]++;
- }
- }
- }
-#else
- /* H.Morii - support for ARGB inputs */
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- input[k + l * 4][2] = *lines[l]++;
- input[k + l * 4][1] = *lines[l]++;
- input[k + l * 4][0] = *lines[l]++;
- if (comps == 4) input[k + l * 4][3] = *lines[l]++;
- }
- }
-#endif
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 4x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
- for (k = 0; k < N_TEXELS; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minCol = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxCol = k;
- }
- if (input[k][ACOMP] < 128) {
- transparent = 1;
- }
- }
-
- color0 = COLOR565(input[minCol]);
- color1 = COLOR565(input[maxCol]);
-
- if (color0 == color1) {
- /* we'll use 3-vector */
- cc[0] = color0 | (color1 << 16);
- hi = transparent ? -1 : 0;
- } else {
- if (transparent ^ (color0 <= color1)) {
- int aux;
- aux = color0;
- color0 = color1;
- color1 = aux;
- aux = minCol;
- minCol = maxCol;
- maxCol = aux;
- }
- n_vect = (color0 <= color1) ? 2 : 3;
-
- MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
- /* add in texels */
- cc[0] = color0 | (color1 << 16);
- hi = 0;
- for (k = N_TEXELS - 1; k >= 0; k--) {
- int texel = 3;
- if (input[k][ACOMP] >= 128) {
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- texel = dxtn_color_tlat[transparent][texel];
- }
- /* add in texel */
- hi <<= 2;
- hi |= texel;
- }
- }
- cc[1] = hi;
-}
-
-
-static void
-dxt3_rgba_quantize (dword *cc, const byte *lines[], int comps)
-{
- float b, iv[MAX_COMP]; /* interpolation vector */
-
- dword lolo, lohi; /* low quadword: lo dword, hi dword */
- dword hihi; /* high quadword: high dword */
- int color0, color1;
- const int n_vect = 3;
- const int n_comp = 3;
-
-#ifndef YUV
- int minSum = 2000; /* big enough */
-#else
- int minSum = 2000000;
-#endif
- int maxSum = -1; /* small enough */
- int minCol = 0; /* phoudoin: silent compiler! */
- int maxCol = 0; /* phoudoin: silent compiler! */
-
- byte input[N_TEXELS][MAX_COMP];
- int i, k, l;
-
- if (comps == 3) {
- /* make the whole block opaque */
- memset(input, -1, sizeof(input));
- }
-
- /* 4 texels each line */
-#ifndef ARGB
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- for (i = 0; i < comps; i++) {
- input[k + l * 4][i] = *lines[l]++;
- }
- }
- }
-#else
- /* H.Morii - support for ARGB inputs */
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- input[k + l * 4][2] = *lines[l]++;
- input[k + l * 4][1] = *lines[l]++;
- input[k + l * 4][0] = *lines[l]++;
- if (comps == 4) input[k + l * 4][3] = *lines[l]++;
- }
- }
-#endif
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 4x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
- for (k = 0; k < N_TEXELS; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minCol = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxCol = k;
- }
- }
-
- /* add in alphas */
- lolo = lohi = 0;
- for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
- /* add in alpha */
- lohi <<= 4;
- lohi |= input[k][ACOMP] >> 4;
- }
- cc[1] = lohi;
- for (; k >= 0; k--) {
- /* add in alpha */
- lolo <<= 4;
- lolo |= input[k][ACOMP] >> 4;
- }
- cc[0] = lolo;
-
- color0 = COLOR565(input[minCol]);
- color1 = COLOR565(input[maxCol]);
-
-#ifdef RADEON
- /* H.Morii - Workaround for ATI Radeon
- * According to the OpenGL EXT_texture_compression_s3tc specs,
- * the encoding of the RGB components for DXT3 and DXT5 formats
- * use the non-transparent encodings of DXT1 but treated as
- * though color0 > color1, regardless of the actual values of
- * color0 and color1. ATI Radeons however require the values to
- * be color0 > color1.
- */
- if (color0 < color1) {
- int aux;
- aux = color0;
- color0 = color1;
- color1 = aux;
- aux = minCol;
- minCol = maxCol;
- maxCol = aux;
- }
-#endif
-
- cc[2] = color0 | (color1 << 16);
-
- hihi = 0;
- if (color0 != color1) {
- MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
- /* add in texels */
- for (k = N_TEXELS - 1; k >= 0; k--) {
- int texel;
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- texel = dxtn_color_tlat[0][texel];
- /* add in texel */
- hihi <<= 2;
- hihi |= texel;
- }
- }
- cc[3] = hihi;
-}
-
-
-static void
-dxt5_rgba_quantize (dword *cc, const byte *lines[], int comps)
-{
- float b, iv[MAX_COMP]; /* interpolation vector */
-
- qword lo; /* low quadword */
- dword hihi; /* high quadword: high dword */
- int color0, color1;
- const int n_vect = 3;
- const int n_comp = 3;
-
-#ifndef YUV
- int minSum = 2000; /* big enough */
-#else
- int minSum = 2000000;
-#endif
- int maxSum = -1; /* small enough */
- int minCol = 0; /* phoudoin: silent compiler! */
- int maxCol = 0; /* phoudoin: silent compiler! */
- int alpha0 = 2000; /* big enough */
- int alpha1 = -1; /* small enough */
- int anyZero = 0, anyOne = 0;
- int a_vect;
-
- byte input[N_TEXELS][MAX_COMP];
- int i, k, l;
-
- if (comps == 3) {
- /* make the whole block opaque */
- memset(input, -1, sizeof(input));
- }
-
- /* 4 texels each line */
-#ifndef ARGB
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- for (i = 0; i < comps; i++) {
- input[k + l * 4][i] = *lines[l]++;
- }
- }
- }
-#else
- /* H.Morii - support for ARGB inputs */
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- input[k + l * 4][2] = *lines[l]++;
- input[k + l * 4][1] = *lines[l]++;
- input[k + l * 4][0] = *lines[l]++;
- if (comps == 4) input[k + l * 4][3] = *lines[l]++;
- }
- }
-#endif
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 4x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
- for (k = 0; k < N_TEXELS; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minCol = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxCol = k;
- }
- if (alpha0 > input[k][ACOMP]) {
- alpha0 = input[k][ACOMP];
- }
- if (alpha1 < input[k][ACOMP]) {
- alpha1 = input[k][ACOMP];
- }
- if (input[k][ACOMP] == 0) {
- anyZero = 1;
- }
- if (input[k][ACOMP] == 255) {
- anyOne = 1;
- }
- }
-
- /* add in alphas */
- if (alpha0 == alpha1) {
- /* we'll use 6-vector */
- cc[0] = alpha0 | (alpha1 << 8);
- cc[1] = 0;
- } else {
- if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) {
- /* we still might use 8-vector */
- anyZero = 0;
- }
- if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) {
- /* we still might use 8-vector */
- anyOne = 0;
- }
- if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) {
- int aux;
- aux = alpha0;
- alpha0 = alpha1;
- alpha1 = aux;
- }
- a_vect = (alpha0 <= alpha1) ? 5 : 7;
-
- /* compute interpolation vector */
- iv[ACOMP] = (float)a_vect / (alpha1 - alpha0);
- b = -iv[ACOMP] * alpha0 + 0.5F;
-
- /* add in alphas */
- Q_MOV32(lo, 0);
- for (k = N_TEXELS - 1; k >= 0; k--) {
- int texel = -1;
- if (anyZero | anyOne) {
- if (input[k][ACOMP] == 0) {
- texel = 6;
- } else if (input[k][ACOMP] == 255) {
- texel = 7;
- }
- }
- /* interpolate alpha */
- if (texel == -1) {
- float dot = input[k][ACOMP] * iv[ACOMP];
- texel = (int)(dot + b);
-#if SAFECDOT
- if (texel < 0) {
- texel = 0;
- } else if (texel > a_vect) {
- texel = a_vect;
- }
-#endif
- texel = dxtn_alpha_tlat[anyZero | anyOne][texel];
- }
- /* add in texel */
- Q_SHL(lo, 3);
- Q_OR32(lo, texel);
- }
- Q_SHL(lo, 16);
- Q_OR32(lo, alpha0 | (alpha1 << 8));
- ((qword *)cc)[0] = lo;
- }
-
- color0 = COLOR565(input[minCol]);
- color1 = COLOR565(input[maxCol]);
-
-#ifdef RADEON /* H.Morii - Workaround for ATI Radeon */
- if (color0 < color1) {
- int aux;
- aux = color0;
- color0 = color1;
- color1 = aux;
- aux = minCol;
- minCol = maxCol;
- maxCol = aux;
- }
-#endif
-
- cc[2] = color0 | (color1 << 16);
-
- hihi = 0;
- if (color0 != color1) {
- MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
-
- /* add in texels */
- for (k = N_TEXELS - 1; k >= 0; k--) {
- int texel;
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- texel = dxtn_color_tlat[0][texel];
- /* add in texel */
- hihi <<= 2;
- hihi |= texel;
- }
- }
- cc[3] = hihi;
-}
-
-
-#define ENCODER(dxtn, n) \
-int TAPIENTRY \
-dxtn##_encode (int width, int height, int comps, \
- const void *source, int srcRowStride, \
- void *dest, int destRowStride) \
-{ \
- int x, y; \
- const byte *data; \
- dword *encoded = (dword *)dest; \
- void *newSource = NULL; \
- \
- /* Replicate image if width is not M4 or height is not M4 */ \
- if ((width & 3) | (height & 3)) { \
- int newWidth = (width + 3) & ~3; \
- int newHeight = (height + 3) & ~3; \
- newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\
- _mesa_upscale_teximage2d(width, height, newWidth, newHeight, \
- comps, (const byte *)source, \
- srcRowStride, (byte *)newSource); \
- source = newSource; \
- width = newWidth; \
- height = newHeight; \
- srcRowStride = comps * newWidth; \
- } \
- \
- data = (const byte *)source; \
- destRowStride = (destRowStride - width * n) / 4; \
- for (y = 0; y < height; y += 4) { \
- unsigned int offs = 0 + (y + 0) * srcRowStride; \
- for (x = 0; x < width; x += 4) { \
- const byte *lines[4]; \
- lines[0] = &data[offs]; \
- lines[1] = lines[0] + srcRowStride; \
- lines[2] = lines[1] + srcRowStride; \
- lines[3] = lines[2] + srcRowStride; \
- offs += 4 * comps; \
- dxtn##_quantize(encoded, lines, comps); \
- /* 4x4 block */ \
- encoded += n; \
- } \
- encoded += destRowStride; \
- } \
- \
- if (newSource != NULL) { \
- free(newSource); \
- } \
- \
- return 0; \
-}
-
-ENCODER(dxt1_rgb, 2)
-ENCODER(dxt1_rgba, 2)
-ENCODER(dxt3_rgba, 4)
-ENCODER(dxt5_rgba, 4)
-
-
-/***************************************************************************\
- * DXTn decoder
- *
- * The decoder is based on GL_EXT_texture_compression_s3tc
- * specification and serves as a concept for the encoder.
-\***************************************************************************/
-
-
-/* lookup table for scaling 4 bit colors up to 8 bits */
-static const byte _rgb_scale_4[] = {
- 0, 17, 34, 51, 68, 85, 102, 119,
- 136, 153, 170, 187, 204, 221, 238, 255
-};
-
-/* lookup table for scaling 5 bit colors up to 8 bits */
-static const byte _rgb_scale_5[] = {
- 0, 8, 16, 25, 33, 41, 49, 58,
- 66, 74, 82, 90, 99, 107, 115, 123,
- 132, 140, 148, 156, 165, 173, 181, 189,
- 197, 206, 214, 222, 230, 239, 247, 255
-};
-
-/* lookup table for scaling 6 bit colors up to 8 bits */
-static const byte _rgb_scale_6[] = {
- 0, 4, 8, 12, 16, 20, 24, 28,
- 32, 36, 40, 45, 49, 53, 57, 61,
- 65, 69, 73, 77, 81, 85, 89, 93,
- 97, 101, 105, 109, 113, 117, 121, 125,
- 130, 134, 138, 142, 146, 150, 154, 158,
- 162, 166, 170, 174, 178, 182, 186, 190,
- 194, 198, 202, 206, 210, 215, 219, 223,
- 227, 231, 235, 239, 243, 247, 251, 255
-};
-
-
-#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
-#define UP4(c) _rgb_scale_4[(c) & 15]
-#define UP5(c) _rgb_scale_5[(c) & 31]
-#define UP6(c) _rgb_scale_6[(c) & 63]
-#define ZERO_4UBV(v) *((dword *)(v)) = 0
-
-
-void TAPIENTRY
-dxt1_rgb_decode_1 (const void *texture, int stride,
- int i, int j, byte *rgba)
-{
- const byte *src = (const byte *)texture
- + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
- const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
- if (code == 0) {
- rgba[RCOMP] = UP5(CC_SEL(src, 11));
- rgba[GCOMP] = UP6(CC_SEL(src, 5));
- rgba[BCOMP] = UP5(CC_SEL(src, 0));
- } else if (code == 1) {
- rgba[RCOMP] = UP5(CC_SEL(src, 27));
- rgba[GCOMP] = UP6(CC_SEL(src, 21));
- rgba[BCOMP] = UP5(CC_SEL(src, 16));
- } else {
- const word col0 = src[0] | (src[1] << 8);
- const word col1 = src[2] | (src[3] << 8);
- if (col0 > col1) {
- if (code == 2) {
- rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
- rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3;
- rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3;
- } else {
- rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
- rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3;
- rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3;
- }
- } else {
- if (code == 2) {
- rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
- rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2;
- rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2;
- } else {
- ZERO_4UBV(rgba);
- }
- }
- }
- rgba[ACOMP] = 255;
-}
-
-
-void TAPIENTRY
-dxt1_rgba_decode_1 (const void *texture, int stride,
- int i, int j, byte *rgba)
-{
- /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */
- const byte *src = (const byte *)texture
- + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
- const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
- if (code == 0) {
- rgba[RCOMP] = UP5(CC_SEL(src, 11));
- rgba[GCOMP] = UP6(CC_SEL(src, 5));
- rgba[BCOMP] = UP5(CC_SEL(src, 0));
- rgba[ACOMP] = 255;
- } else if (code == 1) {
- rgba[RCOMP] = UP5(CC_SEL(src, 27));
- rgba[GCOMP] = UP6(CC_SEL(src, 21));
- rgba[BCOMP] = UP5(CC_SEL(src, 16));
- rgba[ACOMP] = 255;
- } else {
- const word col0 = src[0] | (src[1] << 8);
- const word col1 = src[2] | (src[3] << 8);
- if (col0 > col1) {
- if (code == 2) {
- rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
- rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3;
- rgba[BCOMP] = (UP5(col0 ) * 2 + UP5(col1 )) / 3;
- } else {
- rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
- rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3;
- rgba[BCOMP] = (UP5(col0 ) + 2 * UP5(col1 )) / 3;
- }
- rgba[ACOMP] = 255;
- } else {
- if (code == 2) {
- rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
- rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2;
- rgba[BCOMP] = (UP5(col0 ) + UP5(col1 )) / 2;
- rgba[ACOMP] = 255;
- } else {
- ZERO_4UBV(rgba);
- }
- }
- }
-}
-
-
-void TAPIENTRY
-dxt3_rgba_decode_1 (const void *texture, int stride,
- int i, int j, byte *rgba)
-{
- const byte *src = (const byte *)texture
- + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
- const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
- const dword *cc = (const dword *)(src + 8);
- if (code == 0) {
- rgba[RCOMP] = UP5(CC_SEL(cc, 11));
- rgba[GCOMP] = UP6(CC_SEL(cc, 5));
- rgba[BCOMP] = UP5(CC_SEL(cc, 0));
- } else if (code == 1) {
- rgba[RCOMP] = UP5(CC_SEL(cc, 27));
- rgba[GCOMP] = UP6(CC_SEL(cc, 21));
- rgba[BCOMP] = UP5(CC_SEL(cc, 16));
- } else if (code == 2) {
- /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
- rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
- rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
- rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
- } else {
- rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
- rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
- rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
- }
- rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4));
-}
-
-
-void TAPIENTRY
-dxt5_rgba_decode_1 (const void *texture, int stride,
- int i, int j, byte *rgba)
-{
- const byte *src = (const byte *)texture
- + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
- const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
- const dword *cc = (const dword *)(src + 8);
- const byte alpha0 = src[0];
- const byte alpha1 = src[1];
- const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16;
- const int acode = ((alphaShift == 31)
- ? CC_SEL(src + 2, alphaShift - 16)
- : CC_SEL(src, alphaShift)) & 0x7;
- if (code == 0) {
- rgba[RCOMP] = UP5(CC_SEL(cc, 11));
- rgba[GCOMP] = UP6(CC_SEL(cc, 5));
- rgba[BCOMP] = UP5(CC_SEL(cc, 0));
- } else if (code == 1) {
- rgba[RCOMP] = UP5(CC_SEL(cc, 27));
- rgba[GCOMP] = UP6(CC_SEL(cc, 21));
- rgba[BCOMP] = UP5(CC_SEL(cc, 16));
- } else if (code == 2) {
- /* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
- rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
- rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
- rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
- } else {
- rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
- rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
- rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
- }
- if (acode == 0) {
- rgba[ACOMP] = alpha0;
- } else if (acode == 1) {
- rgba[ACOMP] = alpha1;
- } else if (alpha0 > alpha1) {
- rgba[ACOMP] = ((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7;
- } else if (acode == 6) {
- rgba[ACOMP] = 0;
- } else if (acode == 7) {
- rgba[ACOMP] = 255;
- } else {
- rgba[ACOMP] = ((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5;
- }
-}
+++ /dev/null
-/*
- * DXTn codec
- * Version: 1.1
- *
- * Copyright (C) 2004 Daniel Borca All Rights Reserved.
- *
- * this is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * this is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-#ifndef DXTN_H_included
-#define DXTN_H_included
-
-TAPI int TAPIENTRY
-dxt1_rgb_encode (int width, int height, int comps,
- const void *source, int srcRowStride,
- void *dest, int destRowStride);
-
-TAPI int TAPIENTRY
-dxt1_rgba_encode (int width, int height, int comps,
- const void *source, int srcRowStride,
- void *dest, int destRowStride);
-
-TAPI int TAPIENTRY
-dxt3_rgba_encode (int width, int height, int comps,
- const void *source, int srcRowStride,
- void *dest, int destRowStride);
-
-TAPI int TAPIENTRY
-dxt5_rgba_encode (int width, int height, int comps,
- const void *source, int srcRowStride,
- void *dest, int destRowStride);
-
-TAPI void TAPIENTRY
-dxt1_rgb_decode_1 (const void *texture, int stride /* in pixels */,
- int i, int j, byte *rgba);
-
-TAPI void TAPIENTRY
-dxt1_rgba_decode_1 (const void *texture, int stride /* in pixels */,
- int i, int j, byte *rgba);
-
-TAPI void TAPIENTRY
-dxt3_rgba_decode_1 (const void *texture, int stride /* in pixels */,
- int i, int j, byte *rgba);
-
-TAPI void TAPIENTRY
-dxt5_rgba_decode_1 (const void *texture, int stride /* in pixels */,
- int i, int j, byte *rgba);
-
-#endif
/*
- * FXT1 codec
- * Version: 1.1
+ * Mesa 3-D graphics library
*
- * Copyright (C) 2004 Daniel Borca All Rights Reserved.
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
-/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
- * Added support for ARGB inputs.
+/**
+ * \file texcompress_fxt1.c
+ * GL_3DFX_texture_compression_FXT1 support.
*/
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#include "types.h"
#include "internal.h"
#define LL_RMS_E 255 /* fault tolerance (maximum error) */
#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
#define ISTBLACK(v) (*((dword *)(v)) == 0)
-#define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
static int
fxt1_bestcol (float vec[][MAX_COMP], int nv,
- byte input[MAX_COMP], int nc)
+ byte input[MAX_COMP], int nc)
{
- int i, j, best = -1;
- float err = 1e9; /* big enough */
-
- for (j = 0; j < nv; j++) {
- float e = 0.0F;
- for (i = 0; i < nc; i++) {
- e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
- }
- if (e < err) {
- err = e;
- best = j;
- }
- }
-
- return best;
+ int i, j, best = -1;
+ float err = 1e9; /* big enough */
+
+ for (j = 0; j < nv; j++) {
+ float e = 0.0F;
+ for (i = 0; i < nc; i++) {
+ e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
+ }
+ if (e < err) {
+ err = e;
+ best = j;
+ }
+ }
+
+ return best;
}
static int
fxt1_worst (float vec[MAX_COMP],
- byte input[N_TEXELS][MAX_COMP], int nc, int n)
+ byte input[N_TEXELS][MAX_COMP], int nc, int n)
{
- int i, k, worst = -1;
- float err = -1.0F; /* small enough */
-
- for (k = 0; k < n; k++) {
- float e = 0.0F;
- for (i = 0; i < nc; i++) {
- e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
- }
- if (e > err) {
- err = e;
- worst = k;
- }
- }
-
- return worst;
+ int i, k, worst = -1;
+ float err = -1.0F; /* small enough */
+
+ for (k = 0; k < n; k++) {
+ float e = 0.0F;
+ for (i = 0; i < nc; i++) {
+ e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
+ }
+ if (e > err) {
+ err = e;
+ worst = k;
+ }
+ }
+
+ return worst;
}
static int
fxt1_variance (double variance[MAX_COMP],
- byte input[N_TEXELS][MAX_COMP], int nc, int n)
+ byte input[N_TEXELS][MAX_COMP], int nc, int n)
{
- int i, k, best = 0;
- dword sx, sx2;
- double var, maxvar = -1; /* small enough */
- double teenth = 1.0 / n;
-
- for (i = 0; i < nc; i++) {
- sx = sx2 = 0;
- for (k = 0; k < n; k++) {
- int t = input[k][i];
- sx += t;
- sx2 += t * t;
- }
- var = sx2 * teenth - sx * sx * teenth * teenth;
- if (maxvar < var) {
- maxvar = var;
- best = i;
- }
- if (variance) {
- variance[i] = var;
- }
- }
-
- return best;
+ int i, k, best = 0;
+ int sx, sx2;
+ double var, maxvar = -1; /* small enough */
+ double teenth = 1.0 / n;
+
+ for (i = 0; i < nc; i++) {
+ sx = sx2 = 0;
+ for (k = 0; k < n; k++) {
+ int t = input[k][i];
+ sx += t;
+ sx2 += t * t;
+ }
+ var = sx2 * teenth - sx * sx * teenth * teenth;
+ if (maxvar < var) {
+ maxvar = var;
+ best = i;
+ }
+ if (variance) {
+ variance[i] = var;
+ }
+ }
+
+ return best;
}
static int
fxt1_choose (float vec[][MAX_COMP], int nv,
- byte input[N_TEXELS][MAX_COMP], int nc, int n)
+ byte input[N_TEXELS][MAX_COMP], int nc, int n)
{
#if 0
- /* Choose colors from a grid.
- */
- int i, j;
-
- for (j = 0; j < nv; j++) {
- int m = j * (n - 1) / (nv - 1);
- for (i = 0; i < nc; i++) {
- vec[j][i] = input[m][i];
- }
- }
+ /* Choose colors from a grid.
+ */
+ int i, j;
+
+ for (j = 0; j < nv; j++) {
+ int m = j * (n - 1) / (nv - 1);
+ for (i = 0; i < nc; i++) {
+ vec[j][i] = input[m][i];
+ }
+ }
#else
- /* Our solution here is to find the darkest and brightest colors in
- * the 8x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
- int i, j, k;
-#ifndef YUV
- int minSum = 2000; /* big enough */
-#else
- int minSum = 2000000;
-#endif
- int maxSum = -1; /* small enough */
- int minCol = 0; /* phoudoin: silent compiler! */
- int maxCol = 0; /* phoudoin: silent compiler! */
-
- struct {
- int flag;
- dword key;
- int freq;
- int idx;
- } hist[N_TEXELS];
- int lenh = 0;
-
- memset(hist, 0, sizeof(hist));
-
- for (k = 0; k < n; k++) {
- int l;
- dword key = 0;
- int sum = 0;
- for (i = 0; i < nc; i++) {
- key <<= 8;
- key |= input[k][i];
-#ifndef YUV
- sum += input[k][i];
-#else
- /* RGB to YUV conversion according to CCIR 601 specs
- * Y = 0.299R+0.587G+0.114B
- * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
- * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
- */
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- }
- for (l = 0; l < n; l++) {
- if (!hist[l].flag) {
- /* alloc new slot */
- hist[l].flag = !0;
- hist[l].key = key;
- hist[l].freq = 1;
- hist[l].idx = k;
- lenh = l + 1;
- break;
- } else if (hist[l].key == key) {
- hist[l].freq++;
- break;
- }
- }
- if (minSum > sum) {
- minSum = sum;
- minCol = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxCol = k;
- }
- }
-
- if (lenh <= nv) {
- for (j = 0; j < lenh; j++) {
- for (i = 0; i < nc; i++) {
- vec[j][i] = (float)input[hist[j].idx][i];
- }
- }
- for (; j < nv; j++) {
- for (i = 0; i < nc; i++) {
- vec[j][i] = vec[0][i];
- }
- }
- return 0;
- }
-
- for (j = 0; j < nv; j++) {
- for (i = 0; i < nc; i++) {
- vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
- }
- }
+ /* Our solution here is to find the darkest and brightest colors in
+ * the 8x4 tile and use those as the two representative colors.
+ * There are probably better algorithms to use (histogram-based).
+ */
+ int i, j, k;
+ int minSum = 2000; /* big enough */
+ int maxSum = -1; /* small enough */
+ int minCol = 0; /* phoudoin: silent compiler! */
+ int maxCol = 0; /* phoudoin: silent compiler! */
+
+ struct {
+ int flag;
+ int key;
+ int freq;
+ int idx;
+ } hist[N_TEXELS];
+ int lenh = 0;
+
+ memset(hist, 0, sizeof(hist));
+
+ for (k = 0; k < n; k++) {
+ int l;
+ int key = 0;
+ int sum = 0;
+ for (i = 0; i < nc; i++) {
+ key <<= 8;
+ key |= input[k][i];
+ sum += input[k][i];
+ }
+ for (l = 0; l < n; l++) {
+ if (!hist[l].flag) {
+ /* alloc new slot */
+ hist[l].flag = !0;
+ hist[l].key = key;
+ hist[l].freq = 1;
+ hist[l].idx = k;
+ lenh = l + 1;
+ break;
+ } else if (hist[l].key == key) {
+ hist[l].freq++;
+ break;
+ }
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minCol = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxCol = k;
+ }
+ }
+
+ if (lenh <= nv) {
+ for (j = 0; j < lenh; j++) {
+ for (i = 0; i < nc; i++) {
+ vec[j][i] = (float)input[hist[j].idx][i];
+ }
+ }
+ for (; j < nv; j++) {
+ for (i = 0; i < nc; i++) {
+ vec[j][i] = vec[0][i];
+ }
+ }
+ return 0;
+ }
+
+ for (j = 0; j < nv; j++) {
+ for (i = 0; i < nc; i++) {
+ vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
+ }
+ }
#endif
- return !0;
+ return !0;
}
static int
fxt1_lloyd (float vec[][MAX_COMP], int nv,
- byte input[N_TEXELS][MAX_COMP], int nc, int n)
+ byte input[N_TEXELS][MAX_COMP], int nc, int n)
{
- /* Use the generalized lloyd's algorithm for VQ:
- * find 4 color vectors.
- *
- * for each sample color
- * sort to nearest vector.
- *
- * replace each vector with the centroid of it's matching colors.
- *
- * repeat until RMS doesn't improve.
- *
- * if a color vector has no samples, or becomes the same as another
- * vector, replace it with the color which is farthest from a sample.
- *
- * vec[][MAX_COMP] initial vectors and resulting colors
- * nv number of resulting colors required
- * input[N_TEXELS][MAX_COMP] input texels
- * nc number of components in input / vec
- * n number of input samples
- */
-
- int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
- int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
- float error, lasterror = 1e9;
-
- int i, j, k, rep;
-
- /* the quantizer */
- for (rep = 0; rep < LL_N_REP; rep++) {
- /* reset sums & counters */
- for (j = 0; j < nv; j++) {
- for (i = 0; i < nc; i++) {
- sum[j][i] = 0;
- }
- cnt[j] = 0;
- }
- error = 0;
-
- /* scan whole block */
- for (k = 0; k < n; k++) {
+ /* Use the generalized lloyd's algorithm for VQ:
+ * find 4 color vectors.
+ *
+ * for each sample color
+ * sort to nearest vector.
+ *
+ * replace each vector with the centroid of its matching colors.
+ *
+ * repeat until RMS doesn't improve.
+ *
+ * if a color vector has no samples, or becomes the same as another
+ * vector, replace it with the color which is farthest from a sample.
+ *
+ * vec[][MAX_COMP] initial vectors and resulting colors
+ * nv number of resulting colors required
+ * input[N_TEXELS][MAX_COMP] input texels
+ * nc number of components in input / vec
+ * n number of input samples
+ */
+
+ int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
+ int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
+ float error, lasterror = 1e9;
+
+ int i, j, k, rep;
+
+ /* the quantizer */
+ for (rep = 0; rep < LL_N_REP; rep++) {
+ /* reset sums & counters */
+ for (j = 0; j < nv; j++) {
+ for (i = 0; i < nc; i++) {
+ sum[j][i] = 0;
+ }
+ cnt[j] = 0;
+ }
+ error = 0;
+
+ /* scan whole block */
+ for (k = 0; k < n; k++) {
#if 1
- int best = -1;
- float err = 1e9; /* big enough */
- /* determine best vector */
- for (j = 0; j < nv; j++) {
- float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
- (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
- (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
- if (nc == 4) {
- e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
- }
- if (e < err) {
- err = e;
- best = j;
- }
- }
+ int best = -1;
+ float err = 1e9; /* big enough */
+ /* determine best vector */
+ for (j = 0; j < nv; j++) {
+ float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
+ (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
+ (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
+ if (nc == 4) {
+ e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
+ }
+ if (e < err) {
+ err = e;
+ best = j;
+ }
+ }
#else
- int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
+ int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
#endif
- /* add in closest color */
- for (i = 0; i < nc; i++) {
- sum[best][i] += input[k][i];
- }
- /* mark this vector as used */
- cnt[best]++;
- /* accumulate error */
- error += err;
- }
-
- /* check RMS */
- if ((error < LL_RMS_E) ||
- ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
- return !0; /* good match */
- }
- lasterror = error;
-
- /* move each vector to the barycenter of its closest colors */
- for (j = 0; j < nv; j++) {
- if (cnt[j]) {
- float div = 1.0F / cnt[j];
- for (i = 0; i < nc; i++) {
- vec[j][i] = div * sum[j][i];
- }
- } else {
- /* this vec has no samples or is identical with a previous vec */
- int worst = fxt1_worst(vec[j], input, nc, n);
- for (i = 0; i < nc; i++) {
- vec[j][i] = input[worst][i];
- }
- }
- }
- }
-
- return 0; /* could not converge fast enough */
+ assert(best >= 0);
+ /* add in closest color */
+ for (i = 0; i < nc; i++) {
+ sum[best][i] += input[k][i];
+ }
+ /* mark this vector as used */
+ cnt[best]++;
+ /* accumulate error */
+ error += err;
+ }
+
+ /* check RMS */
+ if ((error < LL_RMS_E) ||
+ ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
+ return !0; /* good match */
+ }
+ lasterror = error;
+
+ /* move each vector to the barycenter of its closest colors */
+ for (j = 0; j < nv; j++) {
+ if (cnt[j]) {
+ float div = 1.0F / cnt[j];
+ for (i = 0; i < nc; i++) {
+ vec[j][i] = div * sum[j][i];
+ }
+ } else {
+ /* this vec has no samples or is identical with a previous vec */
+ int worst = fxt1_worst(vec[j], input, nc, n);
+ for (i = 0; i < nc; i++) {
+ vec[j][i] = input[worst][i];
+ }
+ }
+ }
+ }
+
+ return 0; /* could not converge fast enough */
}
static void
fxt1_quantize_CHROMA (dword *cc,
- byte input[N_TEXELS][MAX_COMP])
+ byte input[N_TEXELS][MAX_COMP])
{
- const int n_vect = 4; /* 4 base vectors to find */
- const int n_comp = 3; /* 3 components: R, G, B */
- float vec[MAX_VECT][MAX_COMP];
- int i, j, k;
- qword hi; /* high quadword */
- dword lohi, lolo; /* low quadword: hi dword, lo dword */
-
- if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
- fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
- }
-
- Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
- for (j = n_vect - 1; j >= 0; j--) {
- for (i = 0; i < n_comp; i++) {
- /* add in colors */
- Q_SHL(hi, 5);
- Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
- }
- }
- ((qword *)cc)[1] = hi;
-
- lohi = lolo = 0;
- /* right microtile */
- for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
- lohi <<= 2;
- lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
- }
- /* left microtile */
- for (; k >= 0; k--) {
- lolo <<= 2;
- lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
- }
- cc[1] = lohi;
- cc[0] = lolo;
+ const int n_vect = 4; /* 4 base vectors to find */
+ const int n_comp = 3; /* 3 components: R, G, B */
+ float vec[MAX_VECT][MAX_COMP];
+ int i, j, k;
+ qword hi; /* high quadword */
+ dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+ if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
+ fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
+ }
+
+ Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
+ for (j = n_vect - 1; j >= 0; j--) {
+ for (i = 0; i < n_comp; i++) {
+ /* add in colors */
+ Q_SHL(hi, 5);
+ Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+ }
+ }
+ ((qword *)cc)[1] = hi;
+
+ lohi = lolo = 0;
+ /* right microtile */
+ for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
+ lohi <<= 2;
+ lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
+ }
+ /* left microtile */
+ for (; k >= 0; k--) {
+ lolo <<= 2;
+ lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
+ }
+ cc[1] = lohi;
+ cc[0] = lolo;
}
static void
fxt1_quantize_ALPHA0 (dword *cc,
- byte input[N_TEXELS][MAX_COMP],
- byte reord[N_TEXELS][MAX_COMP], int n)
+ byte input[N_TEXELS][MAX_COMP],
+ byte reord[N_TEXELS][MAX_COMP], int n)
{
- const int n_vect = 3; /* 3 base vectors to find */
- const int n_comp = 4; /* 4 components: R, G, B, A */
- float vec[MAX_VECT][MAX_COMP];
- int i, j, k;
- qword hi; /* high quadword */
- dword lohi, lolo; /* low quadword: hi dword, lo dword */
-
- /* the last vector indicates zero */
- for (i = 0; i < n_comp; i++) {
- vec[n_vect][i] = 0;
- }
-
- /* the first n texels in reord are guaranteed to be non-zero */
- if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
- fxt1_lloyd(vec, n_vect, reord, n_comp, n);
- }
-
- Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
- for (j = n_vect - 1; j >= 0; j--) {
- /* add in alphas */
- Q_SHL(hi, 5);
- Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
- }
- for (j = n_vect - 1; j >= 0; j--) {
- for (i = 0; i < n_comp - 1; i++) {
- /* add in colors */
- Q_SHL(hi, 5);
- Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
- }
- }
- ((qword *)cc)[1] = hi;
-
- lohi = lolo = 0;
- /* right microtile */
- for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
- lohi <<= 2;
- lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
- }
- /* left microtile */
- for (; k >= 0; k--) {
- lolo <<= 2;
- lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
- }
- cc[1] = lohi;
- cc[0] = lolo;
+ const int n_vect = 3; /* 3 base vectors to find */
+ const int n_comp = 4; /* 4 components: R, G, B, A */
+ float vec[MAX_VECT][MAX_COMP];
+ int i, j, k;
+ qword hi; /* high quadword */
+ dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+ /* the last vector indicates zero */
+ for (i = 0; i < n_comp; i++) {
+ vec[n_vect][i] = 0;
+ }
+
+ /* the first n texels in reord are guaranteed to be non-zero */
+ if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
+ fxt1_lloyd(vec, n_vect, reord, n_comp, n);
+ }
+
+ Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
+ for (j = n_vect - 1; j >= 0; j--) {
+ /* add in alphas */
+ Q_SHL(hi, 5);
+ Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
+ }
+ for (j = n_vect - 1; j >= 0; j--) {
+ for (i = 0; i < n_comp - 1; i++) {
+ /* add in colors */
+ Q_SHL(hi, 5);
+ Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+ }
+ }
+ ((qword *)cc)[1] = hi;
+
+ lohi = lolo = 0;
+ /* right microtile */
+ for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
+ lohi <<= 2;
+ lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
+ }
+ /* left microtile */
+ for (; k >= 0; k--) {
+ lolo <<= 2;
+ lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
+ }
+ cc[1] = lohi;
+ cc[0] = lolo;
}
static void
fxt1_quantize_ALPHA1 (dword *cc,
- byte input[N_TEXELS][MAX_COMP])
+ byte input[N_TEXELS][MAX_COMP])
{
- const int n_vect = 3; /* highest vector number in each microtile */
- const int n_comp = 4; /* 4 components: R, G, B, A */
- float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
- float b, iv[MAX_COMP]; /* interpolation vector */
- int i, j, k;
- qword hi; /* high quadword */
- dword lohi, lolo; /* low quadword: hi dword, lo dword */
-
- int minSum;
- int maxSum;
- int minColL = 0, maxColL = 0;
- int minColR = 0, maxColR = 0;
- int sumL = 0, sumR = 0;
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 4x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
-#ifndef YUV
- minSum = 2000; /* big enough */
-#else
- minSum = 2000000;
-#endif
- maxSum = -1; /* small enough */
- for (k = 0; k < N_TEXELS / 2; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minColL = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxColL = k;
- }
- sumL += sum;
- }
-#ifndef YUV
- minSum = 2000; /* big enough */
-#else
- minSum = 2000000;
-#endif
- maxSum = -1; /* small enough */
- for (; k < N_TEXELS; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minColR = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxColR = k;
- }
- sumR += sum;
- }
-
- /* choose the common vector (yuck!) */
- {
- int j1, j2;
- int v1 = 0, v2 = 0;
- float err = 1e9; /* big enough */
- float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
- for (i = 0; i < n_comp; i++) {
- tv[0][i] = input[minColL][i];
- tv[1][i] = input[maxColL][i];
- tv[2][i] = input[minColR][i];
- tv[3][i] = input[maxColR][i];
- }
- for (j1 = 0; j1 < 2; j1++) {
- for (j2 = 2; j2 < 4; j2++) {
- float e = 0.0F;
- for (i = 0; i < n_comp; i++) {
- e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
- }
- if (e < err) {
- err = e;
- v1 = j1;
- v2 = j2;
- }
- }
- }
- for (i = 0; i < n_comp; i++) {
- vec[0][i] = tv[1 - v1][i];
- vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
- vec[2][i] = tv[5 - v2][i];
- }
- }
-
- /* left microtile */
- cc[0] = 0;
- if (minColL != maxColL) {
- /* compute interpolation vector */
- MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
-
- /* add in texels */
- lolo = 0;
- for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
- int texel;
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- /* add in texel */
- lolo <<= 2;
- lolo |= texel;
- }
-
- cc[0] = lolo;
- }
-
- /* right microtile */
- cc[1] = 0;
- if (minColR != maxColR) {
- /* compute interpolation vector */
- MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
-
- /* add in texels */
- lohi = 0;
- for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
- int texel;
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- /* add in texel */
- lohi <<= 2;
- lohi |= texel;
- }
-
- cc[1] = lohi;
- }
-
- Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
- for (j = n_vect - 1; j >= 0; j--) {
- /* add in alphas */
- Q_SHL(hi, 5);
- Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
- }
- for (j = n_vect - 1; j >= 0; j--) {
- for (i = 0; i < n_comp - 1; i++) {
- /* add in colors */
- Q_SHL(hi, 5);
- Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
- }
- }
- ((qword *)cc)[1] = hi;
+ const int n_vect = 3; /* highest vector number in each microtile */
+ const int n_comp = 4; /* 4 components: R, G, B, A */
+ float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
+ float b, iv[MAX_COMP]; /* interpolation vector */
+ int i, j, k;
+ qword hi; /* high quadword */
+ dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+ int minSum;
+ int maxSum;
+ int minColL = 0, maxColL = 0;
+ int minColR = 0, maxColR = 0;
+ int sumL = 0, sumR = 0;
+ int nn_comp;
+ /* Our solution here is to find the darkest and brightest colors in
+ * the 4x4 tile and use those as the two representative colors.
+ * There are probably better algorithms to use (histogram-based).
+ */
+ nn_comp = n_comp;
+ while ((minColL == maxColL) && nn_comp) {
+ minSum = 2000; /* big enough */
+ maxSum = -1; /* small enough */
+ for (k = 0; k < N_TEXELS / 2; k++) {
+ int sum = 0;
+ for (i = 0; i < nn_comp; i++) {
+ sum += input[k][i];
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minColL = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxColL = k;
+ }
+ sumL += sum;
+ }
+
+ nn_comp--;
+ }
+
+ nn_comp = n_comp;
+ while ((minColR == maxColR) && nn_comp) {
+ minSum = 2000; /* big enough */
+ maxSum = -1; /* small enough */
+ for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
+ int sum = 0;
+ for (i = 0; i < nn_comp; i++) {
+ sum += input[k][i];
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minColR = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxColR = k;
+ }
+ sumR += sum;
+ }
+
+ nn_comp--;
+ }
+
+ /* choose the common vector (yuck!) */
+ {
+ int j1, j2;
+ int v1 = 0, v2 = 0;
+ float err = 1e9; /* big enough */
+ float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+ for (i = 0; i < n_comp; i++) {
+ tv[0][i] = input[minColL][i];
+ tv[1][i] = input[maxColL][i];
+ tv[2][i] = input[minColR][i];
+ tv[3][i] = input[maxColR][i];
+ }
+ for (j1 = 0; j1 < 2; j1++) {
+ for (j2 = 2; j2 < 4; j2++) {
+ float e = 0.0F;
+ for (i = 0; i < n_comp; i++) {
+ e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
+ }
+ if (e < err) {
+ err = e;
+ v1 = j1;
+ v2 = j2;
+ }
+ }
+ }
+ for (i = 0; i < n_comp; i++) {
+ vec[0][i] = tv[1 - v1][i];
+ vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
+ vec[2][i] = tv[5 - v2][i];
+ }
+ }
+
+ /* left microtile */
+ cc[0] = 0;
+ if (minColL != maxColL) {
+ /* compute interpolation vector */
+ MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+ /* add in texels */
+ lolo = 0;
+ for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+ int texel;
+ /* interpolate color */
+ CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+ /* add in texel */
+ lolo <<= 2;
+ lolo |= texel;
+ }
+
+ cc[0] = lolo;
+ }
+
+ /* right microtile */
+ cc[1] = 0;
+ if (minColR != maxColR) {
+ /* compute interpolation vector */
+ MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
+
+ /* add in texels */
+ lohi = 0;
+ for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+ int texel;
+ /* interpolate color */
+ CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+ /* add in texel */
+ lohi <<= 2;
+ lohi |= texel;
+ }
+
+ cc[1] = lohi;
+ }
+
+ Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
+ for (j = n_vect - 1; j >= 0; j--) {
+ /* add in alphas */
+ Q_SHL(hi, 5);
+ Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
+ }
+ for (j = n_vect - 1; j >= 0; j--) {
+ for (i = 0; i < n_comp - 1; i++) {
+ /* add in colors */
+ Q_SHL(hi, 5);
+ Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+ }
+ }
+ ((qword *)cc)[1] = hi;
}
static void
fxt1_quantize_HI (dword *cc,
- byte input[N_TEXELS][MAX_COMP],
- byte reord[N_TEXELS][MAX_COMP], int n)
+ byte input[N_TEXELS][MAX_COMP],
+ byte reord[N_TEXELS][MAX_COMP], int n)
{
- const int n_vect = 6; /* highest vector number */
- const int n_comp = 3; /* 3 components: R, G, B */
- float b = 0.0F; /* phoudoin: silent compiler! */
- float iv[MAX_COMP]; /* interpolation vector */
- int i, k;
- dword hihi; /* high quadword: hi dword */
-
-#ifndef YUV
- int minSum = 2000; /* big enough */
-#else
- int minSum = 2000000;
-#endif
- int maxSum = -1; /* small enough */
- int minCol = 0; /* phoudoin: silent compiler! */
- int maxCol = 0; /* phoudoin: silent compiler! */
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 8x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
- for (k = 0; k < n; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += reord[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minCol = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxCol = k;
- }
- }
-
- hihi = 0; /* cc-hi = "00" */
- for (i = 0; i < n_comp; i++) {
- /* add in colors */
- hihi <<= 5;
- hihi |= reord[maxCol][i] >> 3;
- }
- for (i = 0; i < n_comp; i++) {
- /* add in colors */
- hihi <<= 5;
- hihi |= reord[minCol][i] >> 3;
- }
- cc[3] = hihi;
- cc[0] = cc[1] = cc[2] = 0;
-
- /* compute interpolation vector */
- if (minCol != maxCol) {
- MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
- }
-
- /* add in texels */
- for (k = N_TEXELS - 1; k >= 0; k--) {
- int t = k * 3;
- dword *kk = (dword *)((byte *)cc + t / 8);
- int texel = n_vect + 1; /* transparent black */
-
- if (!ISTBLACK(input[k])) {
- if (minCol != maxCol) {
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- /* add in texel */
- kk[0] |= texel << (t & 7);
- }
- } else {
- /* add in texel */
- kk[0] |= texel << (t & 7);
- }
- }
+ const int n_vect = 6; /* highest vector number */
+ const int n_comp = 3; /* 3 components: R, G, B */
+ float b = 0.0F; /* phoudoin: silent compiler! */
+ float iv[MAX_COMP]; /* interpolation vector */
+ int i, k;
+ dword hihi; /* high quadword: hi dword */
+
+ int minSum = 2000; /* big enough */
+ int maxSum = -1; /* small enough */
+ int minCol = 0; /* phoudoin: silent compiler! */
+ int maxCol = 0; /* phoudoin: silent compiler! */
+
+ /* Our solution here is to find the darkest and brightest colors in
+ * the 8x4 tile and use those as the two representative colors.
+ * There are probably better algorithms to use (histogram-based).
+ */
+ for (k = 0; k < n; k++) {
+ int sum = 0;
+ for (i = 0; i < n_comp; i++) {
+ sum += reord[k][i];
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minCol = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxCol = k;
+ }
+ }
+
+ hihi = 0; /* cc-hi = "00" */
+ for (i = 0; i < n_comp; i++) {
+ /* add in colors */
+ hihi <<= 5;
+ hihi |= reord[maxCol][i] >> 3;
+ }
+ for (i = 0; i < n_comp; i++) {
+ /* add in colors */
+ hihi <<= 5;
+ hihi |= reord[minCol][i] >> 3;
+ }
+ cc[3] = hihi;
+ cc[0] = cc[1] = cc[2] = 0;
+
+ /* compute interpolation vector */
+ if (minCol != maxCol) {
+ MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
+ }
+
+ /* add in texels */
+ for (k = N_TEXELS - 1; k >= 0; k--) {
+ int t = k * 3;
+ dword *kk = (dword *)((char *)cc + t / 8);
+ int texel = n_vect + 1; /* transparent black */
+
+ if (!ISTBLACK(input[k])) {
+ if (minCol != maxCol) {
+ /* interpolate color */
+ CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+ /* add in texel */
+ kk[0] |= texel << (t & 7);
+ }
+ } else {
+ /* add in texel */
+ kk[0] |= texel << (t & 7);
+ }
+ }
}
static void
fxt1_quantize_MIXED1 (dword *cc,
- byte input[N_TEXELS][MAX_COMP])
+ byte input[N_TEXELS][MAX_COMP])
{
- const int n_vect = 2; /* highest vector number in each microtile */
- const int n_comp = 3; /* 3 components: R, G, B */
- byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
- float b, iv[MAX_COMP]; /* interpolation vector */
- int i, j, k;
- qword hi; /* high quadword */
- dword lohi, lolo; /* low quadword: hi dword, lo dword */
-
- int minSum;
- int maxSum;
- int minColL = 0, maxColL = -1;
- int minColR = 0, maxColR = -1;
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 4x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
-#ifndef YUV
- minSum = 2000; /* big enough */
-#else
- minSum = 2000000;
-#endif
- maxSum = -1; /* small enough */
- for (k = 0; k < N_TEXELS / 2; k++) {
- if (!ISTBLACK(input[k])) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minColL = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxColL = k;
- }
- }
- }
-#ifndef YUV
- minSum = 2000; /* big enough */
-#else
- minSum = 2000000;
-#endif
- maxSum = -1; /* small enough */
- for (; k < N_TEXELS; k++) {
- if (!ISTBLACK(input[k])) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minColR = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxColR = k;
- }
- }
- }
-
- /* left microtile */
- if (maxColL == -1) {
- /* all transparent black */
- cc[0] = 0xFFFFFFFF;
- for (i = 0; i < n_comp; i++) {
- vec[0][i] = 0;
- vec[1][i] = 0;
- }
- } else {
- cc[0] = 0;
- for (i = 0; i < n_comp; i++) {
- vec[0][i] = input[minColL][i];
- vec[1][i] = input[maxColL][i];
- }
- if (minColL != maxColL) {
- /* compute interpolation vector */
- MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
-
- /* add in texels */
- lolo = 0;
- for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
- int texel = n_vect + 1; /* transparent black */
- if (!ISTBLACK(input[k])) {
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- }
- /* add in texel */
- lolo <<= 2;
- lolo |= texel;
- }
- cc[0] = lolo;
- }
- }
-
- /* right microtile */
- if (maxColR == -1) {
- /* all transparent black */
- cc[1] = 0xFFFFFFFF;
- for (i = 0; i < n_comp; i++) {
- vec[2][i] = 0;
- vec[3][i] = 0;
- }
- } else {
- cc[1] = 0;
- for (i = 0; i < n_comp; i++) {
- vec[2][i] = input[minColR][i];
- vec[3][i] = input[maxColR][i];
- }
- if (minColR != maxColR) {
- /* compute interpolation vector */
- MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
-
- /* add in texels */
- lohi = 0;
- for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
- int texel = n_vect + 1; /* transparent black */
- if (!ISTBLACK(input[k])) {
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- }
- /* add in texel */
- lohi <<= 2;
- lohi |= texel;
- }
- cc[1] = lohi;
- }
- }
-
- Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
- for (j = 2 * 2 - 1; j >= 0; j--) {
- for (i = 0; i < n_comp; i++) {
- /* add in colors */
- Q_SHL(hi, 5);
- Q_OR32(hi, vec[j][i] >> 3);
- }
- }
- ((qword *)cc)[1] = hi;
+ const int n_vect = 2; /* highest vector number in each microtile */
+ const int n_comp = 3; /* 3 components: R, G, B */
+ byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+ float b, iv[MAX_COMP]; /* interpolation vector */
+ int i, j, k;
+ qword hi; /* high quadword */
+ dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+ int minSum;
+ int maxSum;
+ int minColL = 0, maxColL = -1;
+ int minColR = 0, maxColR = -1;
+
+ /* Our solution here is to find the darkest and brightest colors in
+ * the 4x4 tile and use those as the two representative colors.
+ * There are probably better algorithms to use (histogram-based).
+ */
+ minSum = 2000; /* big enough */
+ maxSum = -1; /* small enough */
+ for (k = 0; k < N_TEXELS / 2; k++) {
+ if (!ISTBLACK(input[k])) {
+ int sum = 0;
+ for (i = 0; i < n_comp; i++) {
+ sum += input[k][i];
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minColL = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxColL = k;
+ }
+ }
+ }
+ minSum = 2000; /* big enough */
+ maxSum = -1; /* small enough */
+ for (; k < N_TEXELS; k++) {
+ if (!ISTBLACK(input[k])) {
+ int sum = 0;
+ for (i = 0; i < n_comp; i++) {
+ sum += input[k][i];
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minColR = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxColR = k;
+ }
+ }
+ }
+
+ /* left microtile */
+ if (maxColL == -1) {
+ /* all transparent black */
+ cc[0] = ~0u;
+ for (i = 0; i < n_comp; i++) {
+ vec[0][i] = 0;
+ vec[1][i] = 0;
+ }
+ } else {
+ cc[0] = 0;
+ for (i = 0; i < n_comp; i++) {
+ vec[0][i] = input[minColL][i];
+ vec[1][i] = input[maxColL][i];
+ }
+ if (minColL != maxColL) {
+ /* compute interpolation vector */
+ MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+ /* add in texels */
+ lolo = 0;
+ for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+ int texel = n_vect + 1; /* transparent black */
+ if (!ISTBLACK(input[k])) {
+ /* interpolate color */
+ CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+ }
+ /* add in texel */
+ lolo <<= 2;
+ lolo |= texel;
+ }
+ cc[0] = lolo;
+ }
+ }
+
+ /* right microtile */
+ if (maxColR == -1) {
+ /* all transparent black */
+ cc[1] = ~0u;
+ for (i = 0; i < n_comp; i++) {
+ vec[2][i] = 0;
+ vec[3][i] = 0;
+ }
+ } else {
+ cc[1] = 0;
+ for (i = 0; i < n_comp; i++) {
+ vec[2][i] = input[minColR][i];
+ vec[3][i] = input[maxColR][i];
+ }
+ if (minColR != maxColR) {
+ /* compute interpolation vector */
+ MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
+
+ /* add in texels */
+ lohi = 0;
+ for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+ int texel = n_vect + 1; /* transparent black */
+ if (!ISTBLACK(input[k])) {
+ /* interpolate color */
+ CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+ }
+ /* add in texel */
+ lohi <<= 2;
+ lohi |= texel;
+ }
+ cc[1] = lohi;
+ }
+ }
+
+ Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
+ for (j = 2 * 2 - 1; j >= 0; j--) {
+ for (i = 0; i < n_comp; i++) {
+ /* add in colors */
+ Q_SHL(hi, 5);
+ Q_OR32(hi, vec[j][i] >> 3);
+ }
+ }
+ ((qword *)cc)[1] = hi;
}
static void
fxt1_quantize_MIXED0 (dword *cc,
- byte input[N_TEXELS][MAX_COMP])
+ byte input[N_TEXELS][MAX_COMP])
{
- const int n_vect = 3; /* highest vector number in each microtile */
- const int n_comp = 3; /* 3 components: R, G, B */
- byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
- float b, iv[MAX_COMP]; /* interpolation vector */
- int i, j, k;
- qword hi; /* high quadword */
- dword lohi, lolo; /* low quadword: hi dword, lo dword */
-
- int minColL = 0, maxColL = 0;
- int minColR = 0, maxColR = 0;
+ const int n_vect = 3; /* highest vector number in each microtile */
+ const int n_comp = 3; /* 3 components: R, G, B */
+ byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+ float b, iv[MAX_COMP]; /* interpolation vector */
+ int i, j, k;
+ qword hi; /* high quadword */
+ dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+ int minColL = 0, maxColL = 0;
+ int minColR = 0, maxColR = 0;
#if 0
- int minSum;
- int maxSum;
-
- /* Our solution here is to find the darkest and brightest colors in
- * the 4x4 tile and use those as the two representative colors.
- * There are probably better algorithms to use (histogram-based).
- */
-#ifndef YUV
- minSum = 2000; /* big enough */
-#else
- minSum = 2000000;
-#endif
- maxSum = -1; /* small enough */
- for (k = 0; k < N_TEXELS / 2; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
-#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minColL = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxColL = k;
- }
- }
- minSum = 2000; /* big enough */
- maxSum = -1; /* small enough */
- for (; k < N_TEXELS; k++) {
- int sum = 0;
-#ifndef YUV
- for (i = 0; i < n_comp; i++) {
- sum += input[k][i];
- }
+ int minSum;
+ int maxSum;
+
+ /* Our solution here is to find the darkest and brightest colors in
+ * the 4x4 tile and use those as the two representative colors.
+ * There are probably better algorithms to use (histogram-based).
+ */
+ minSum = 2000; /* big enough */
+ maxSum = -1; /* small enough */
+ for (k = 0; k < N_TEXELS / 2; k++) {
+ int sum = 0;
+ for (i = 0; i < n_comp; i++) {
+ sum += input[k][i];
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minColL = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxColL = k;
+ }
+ }
+ minSum = 2000; /* big enough */
+ maxSum = -1; /* small enough */
+ for (; k < N_TEXELS; k++) {
+ int sum = 0;
+ for (i = 0; i < n_comp; i++) {
+ sum += input[k][i];
+ }
+ if (minSum > sum) {
+ minSum = sum;
+ minColR = k;
+ }
+ if (maxSum < sum) {
+ maxSum = sum;
+ maxColR = k;
+ }
+ }
#else
- sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
-#endif
- if (minSum > sum) {
- minSum = sum;
- minColR = k;
- }
- if (maxSum < sum) {
- maxSum = sum;
- maxColR = k;
- }
- }
-#else
- int minVal;
- int maxVal;
- int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
- int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
-
- /* Scan the channel with max variance for lo & hi
- * and use those as the two representative colors.
- */
- minVal = 2000; /* big enough */
- maxVal = -1; /* small enough */
- for (k = 0; k < N_TEXELS / 2; k++) {
- int t = input[k][maxVarL];
- if (minVal > t) {
- minVal = t;
- minColL = k;
- }
- if (maxVal < t) {
- maxVal = t;
- maxColL = k;
- }
- }
- minVal = 2000; /* big enough */
- maxVal = -1; /* small enough */
- for (; k < N_TEXELS; k++) {
- int t = input[k][maxVarR];
- if (minVal > t) {
- minVal = t;
- minColR = k;
- }
- if (maxVal < t) {
- maxVal = t;
- maxColR = k;
- }
- }
+ int minVal;
+ int maxVal;
+ int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
+ int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
+
+ /* Scan the channel with max variance for lo & hi
+ * and use those as the two representative colors.
+ */
+ minVal = 2000; /* big enough */
+ maxVal = -1; /* small enough */
+ for (k = 0; k < N_TEXELS / 2; k++) {
+ int t = input[k][maxVarL];
+ if (minVal > t) {
+ minVal = t;
+ minColL = k;
+ }
+ if (maxVal < t) {
+ maxVal = t;
+ maxColL = k;
+ }
+ }
+ minVal = 2000; /* big enough */
+ maxVal = -1; /* small enough */
+ for (; k < N_TEXELS; k++) {
+ int t = input[k][maxVarR];
+ if (minVal > t) {
+ minVal = t;
+ minColR = k;
+ }
+ if (maxVal < t) {
+ maxVal = t;
+ maxColR = k;
+ }
+ }
#endif
- /* left microtile */
- cc[0] = 0;
- for (i = 0; i < n_comp; i++) {
- vec[0][i] = input[minColL][i];
- vec[1][i] = input[maxColL][i];
- }
- if (minColL != maxColL) {
- /* compute interpolation vector */
- MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
-
- /* add in texels */
- lolo = 0;
- for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
- int texel;
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- /* add in texel */
- lolo <<= 2;
- lolo |= texel;
- }
-
- /* funky encoding for LSB of green */
- if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
- for (i = 0; i < n_comp; i++) {
- vec[1][i] = input[minColL][i];
- vec[0][i] = input[maxColL][i];
- }
- lolo = ~lolo;
- }
-
- cc[0] = lolo;
- }
-
- /* right microtile */
- cc[1] = 0;
- for (i = 0; i < n_comp; i++) {
- vec[2][i] = input[minColR][i];
- vec[3][i] = input[maxColR][i];
- }
- if (minColR != maxColR) {
- /* compute interpolation vector */
- MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
-
- /* add in texels */
- lohi = 0;
- for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
- int texel;
- /* interpolate color */
- CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
- /* add in texel */
- lohi <<= 2;
- lohi |= texel;
- }
-
- /* funky encoding for LSB of green */
- if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
- for (i = 0; i < n_comp; i++) {
- vec[3][i] = input[minColR][i];
- vec[2][i] = input[maxColR][i];
- }
- lohi = ~lohi;
- }
-
- cc[1] = lohi;
- }
-
- Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
- for (j = 2 * 2 - 1; j >= 0; j--) {
- for (i = 0; i < n_comp; i++) {
- /* add in colors */
- Q_SHL(hi, 5);
- Q_OR32(hi, vec[j][i] >> 3);
- }
- }
- ((qword *)cc)[1] = hi;
+ /* left microtile */
+ cc[0] = 0;
+ for (i = 0; i < n_comp; i++) {
+ vec[0][i] = input[minColL][i];
+ vec[1][i] = input[maxColL][i];
+ }
+ if (minColL != maxColL) {
+ /* compute interpolation vector */
+ MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+ /* add in texels */
+ lolo = 0;
+ for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+ int texel;
+ /* interpolate color */
+ CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+ /* add in texel */
+ lolo <<= 2;
+ lolo |= texel;
+ }
+
+ /* funky encoding for LSB of green */
+ if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
+ for (i = 0; i < n_comp; i++) {
+ vec[1][i] = input[minColL][i];
+ vec[0][i] = input[maxColL][i];
+ }
+ lolo = ~lolo;
+ }
+
+ cc[0] = lolo;
+ }
+
+ /* right microtile */
+ cc[1] = 0;
+ for (i = 0; i < n_comp; i++) {
+ vec[2][i] = input[minColR][i];
+ vec[3][i] = input[maxColR][i];
+ }
+ if (minColR != maxColR) {
+ /* compute interpolation vector */
+ MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
+
+ /* add in texels */
+ lohi = 0;
+ for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+ int texel;
+ /* interpolate color */
+ CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+ /* add in texel */
+ lohi <<= 2;
+ lohi |= texel;
+ }
+
+ /* funky encoding for LSB of green */
+ if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
+ for (i = 0; i < n_comp; i++) {
+ vec[3][i] = input[minColR][i];
+ vec[2][i] = input[maxColR][i];
+ }
+ lohi = ~lohi;
+ }
+
+ cc[1] = lohi;
+ }
+
+ Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
+ for (j = 2 * 2 - 1; j >= 0; j--) {
+ for (i = 0; i < n_comp; i++) {
+ /* add in colors */
+ Q_SHL(hi, 5);
+ Q_OR32(hi, vec[j][i] >> 3);
+ }
+ }
+ ((qword *)cc)[1] = hi;
}
static void
fxt1_quantize (dword *cc, const byte *lines[], int comps)
{
- int trualpha;
- byte reord[N_TEXELS][MAX_COMP];
+ int trualpha;
+ byte reord[N_TEXELS][MAX_COMP];
+
+ byte input[N_TEXELS][MAX_COMP];
+ int i, k, l;
+
+ if (comps == 3) {
+ /* make the whole block opaque */
+ memset(input, -1, sizeof(input));
+ }
+
+ /* 8 texels each line */
+ for (l = 0; l < 4; l++) {
+ for (k = 0; k < 4; k++) {
+ for (i = 0; i < comps; i++) {
+ input[k + l * 4][i] = *lines[l]++;
+ }
+ }
+ for (; k < 8; k++) {
+ for (i = 0; i < comps; i++) {
+ input[k + l * 4 + 12][i] = *lines[l]++;
+ }
+ }
+ }
+
+ /* block layout:
+ * 00, 01, 02, 03, 08, 09, 0a, 0b
+ * 10, 11, 12, 13, 18, 19, 1a, 1b
+ * 04, 05, 06, 07, 0c, 0d, 0e, 0f
+ * 14, 15, 16, 17, 1c, 1d, 1e, 1f
+ */
+
+ /* [dBorca]
+ * stupidity flows forth from this
+ */
+ l = N_TEXELS;
+ trualpha = 0;
+ if (comps == 4) {
+ /* skip all transparent black texels */
+ l = 0;
+ for (k = 0; k < N_TEXELS; k++) {
+ /* test all components against 0 */
+ if (!ISTBLACK(input[k])) {
+ /* texel is not transparent black */
+ COPY_4UBV(reord[l], input[k]);
+ if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
+ /* non-opaque texel */
+ trualpha = !0;
+ }
+ l++;
+ }
+ }
+ }
- byte input[N_TEXELS][MAX_COMP];
-#ifndef ARGB
- int i;
-#endif
- int k, l;
-
- if (comps == 3) {
- /* make the whole block opaque */
- memset(input, -1, sizeof(input));
- }
-
- /* 8 texels each line */
-#ifndef ARGB
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- for (i = 0; i < comps; i++) {
- input[k + l * 4][i] = *lines[l]++;
- }
- }
- for (; k < 8; k++) {
- for (i = 0; i < comps; i++) {
- input[k + l * 4 + 12][i] = *lines[l]++;
- }
- }
- }
+#if 0
+ if (trualpha) {
+ fxt1_quantize_ALPHA0(cc, input, reord, l);
+ } else if (l == 0) {
+ cc[0] = cc[1] = cc[2] = -1;
+ cc[3] = 0;
+ } else if (l < N_TEXELS) {
+ fxt1_quantize_HI(cc, input, reord, l);
+ } else {
+ fxt1_quantize_CHROMA(cc, input);
+ }
+ (void)fxt1_quantize_ALPHA1;
+ (void)fxt1_quantize_MIXED1;
+ (void)fxt1_quantize_MIXED0;
#else
- /* H.Morii - support for ARGB inputs */
- for (l = 0; l < 4; l++) {
- for (k = 0; k < 4; k++) {
- input[k + l * 4][2] = *lines[l]++;
- input[k + l * 4][1] = *lines[l]++;
- input[k + l * 4][0] = *lines[l]++;
- if (comps == 4) input[k + l * 4][3] = *lines[l]++;
- }
- for (; k < 8; k++) {
- input[k + l * 4 + 12][2] = *lines[l]++;
- input[k + l * 4 + 12][1] = *lines[l]++;
- input[k + l * 4 + 12][0] = *lines[l]++;
- if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++;
- }
- }
+ if (trualpha) {
+ fxt1_quantize_ALPHA1(cc, input);
+ } else if (l == 0) {
+ cc[0] = cc[1] = cc[2] = ~0u;
+ cc[3] = 0;
+ } else if (l < N_TEXELS) {
+ fxt1_quantize_MIXED1(cc, input);
+ } else {
+ fxt1_quantize_MIXED0(cc, input);
+ }
+ (void)fxt1_quantize_ALPHA0;
+ (void)fxt1_quantize_HI;
+ (void)fxt1_quantize_CHROMA;
#endif
+}
- /* block layout:
- * 00, 01, 02, 03, 08, 09, 0a, 0b
- * 10, 11, 12, 13, 18, 19, 1a, 1b
- * 04, 05, 06, 07, 0c, 0d, 0e, 0f
- * 14, 15, 16, 17, 1c, 1d, 1e, 1f
- */
-
- /* [dBorca]
- * stupidity flows forth from this
- */
- l = N_TEXELS;
- trualpha = 0;
- if (comps == 4) {
- /* skip all transparent black texels */
- l = 0;
- for (k = 0; k < N_TEXELS; k++) {
- /* test all components against 0 */
- if (!ISTBLACK(input[k])) {
- /* texel is not transparent black */
- COPY_4UBV(reord[l], input[k]);
- if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
- /* non-opaque texel */
- trualpha = !0;
- }
- l++;
- }
- }
- }
+
+/**
+ * Upscale an image by replication, not (typical) stretching.
+ * We use this when the image width or height is less than a
+ * certain size (4, 8) and we need to upscale an image.
+ */
+static void
+upscale_teximage2d(int inWidth, int inHeight,
+ int outWidth, int outHeight,
+ int comps, const byte *src, int srcRowStride,
+ byte *dest )
+{
+ int i, j, k;
+
+ assert(outWidth >= inWidth);
+ assert(outHeight >= inHeight);
#if 0
- if (trualpha) {
- fxt1_quantize_ALPHA0(cc, input, reord, l);
- } else if (l == 0) {
- cc[0] = cc[1] = cc[2] = -1;
- cc[3] = 0;
- } else if (l < N_TEXELS) {
- fxt1_quantize_HI(cc, input, reord, l);
- } else {
- fxt1_quantize_CHROMA(cc, input);
- }
- (void)fxt1_quantize_ALPHA1;
- (void)fxt1_quantize_MIXED1;
- (void)fxt1_quantize_MIXED0;
-#else
- if (trualpha) {
- fxt1_quantize_ALPHA1(cc, input);
- } else if (l == 0) {
- cc[0] = cc[1] = cc[2] = 0xFFFFFFFF;
- cc[3] = 0;
- } else if (l < N_TEXELS) {
- fxt1_quantize_MIXED1(cc, input);
- } else {
- fxt1_quantize_MIXED0(cc, input);
- }
- (void)fxt1_quantize_ALPHA0;
- (void)fxt1_quantize_HI;
- (void)fxt1_quantize_CHROMA;
+ ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
+ ASSERT((outWidth & 3) == 0);
+ ASSERT((outHeight & 3) == 0);
#endif
-}
+ for (i = 0; i < outHeight; i++) {
+ const int ii = i % inHeight;
+ for (j = 0; j < outWidth; j++) {
+ const int jj = j % inWidth;
+ for (k = 0; k < comps; k++) {
+ dest[(i * outWidth + j) * comps + k]
+ = src[ii * srcRowStride + jj * comps + k];
+ }
+ }
+ }
+}
-TAPI int TAPIENTRY
-fxt1_encode (int width, int height, int comps,
- const void *source, int srcRowStride,
- void *dest, int destRowStride)
+TAPI void TAPIENTRY
+fxt1_encode (dword width, dword height, int comps,
+ const void *source, int srcRowStride,
+ void *dest, int destRowStride)
{
- int x, y;
- const byte *data;
- dword *encoded = (dword *)dest;
- void *newSource = NULL;
-
- /* Replicate image if width is not M8 or height is not M4 */
- if ((width & 7) | (height & 3)) {
- int newWidth = (width + 7) & ~7;
- int newHeight = (height + 3) & ~3;
- newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));
- _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
- comps, (const byte *)source,
- srcRowStride, (byte *)newSource);
- source = newSource;
- width = newWidth;
- height = newHeight;
- srcRowStride = comps * newWidth;
- }
-
- data = (const byte *)source;
- destRowStride = (destRowStride - width * 2) / 4;
- for (y = 0; y < height; y += 4) {
- unsigned int offs = 0 + (y + 0) * srcRowStride;
- for (x = 0; x < width; x += 8) {
- const byte *lines[4];
- lines[0] = &data[offs];
- lines[1] = lines[0] + srcRowStride;
- lines[2] = lines[1] + srcRowStride;
- lines[3] = lines[2] + srcRowStride;
- offs += 8 * comps;
- fxt1_quantize(encoded, lines, comps);
- /* 128 bits per 8x4 block */
- encoded += 4;
- }
- encoded += destRowStride;
- }
-
- if (newSource != NULL) {
- free(newSource);
- }
-
- return 0;
+ dword x, y;
+ const byte *data;
+ dword *encoded = (dword *)dest;
+ void *newSource = NULL, *newSourcetmp = NULL;
+
+ assert(comps == 3 || comps == 4);
+
+ if (comps == 3)
+ newSource = reorder_source_3_alloc(source, width, height, srcRowStride);
+ if (comps == 4)
+ newSource = reorder_source_4_alloc(source, width, height, srcRowStride);
+ if (!newSource)
+ goto cleanUp;
+ source = newSource;
+
+ /* Replicate image if width is not M8 or height is not M4 */
+ if ((width & 7) | (height & 3)) {
+ int newWidth = (width + 7) & ~7;
+ int newHeight = (height + 3) & ~3;
+ newSourcetmp = malloc(comps * newWidth * newHeight * sizeof(byte));
+ free(newSource);
+ newSource = newSourcetmp;
+ if (!newSource) {
+ goto cleanUp;
+ }
+ upscale_teximage2d(width, height, newWidth, newHeight,
+ comps, (const byte *) source,
+ srcRowStride, (byte *) newSource);
+ source = newSource;
+ width = newWidth;
+ height = newHeight;
+ srcRowStride = comps * newWidth;
+ }
+
+ data = (const byte *) source;
+ destRowStride = (destRowStride - width * 2) / 4;
+ for (y = 0; y < height; y += 4) {
+ dword offs = 0 + (y + 0) * srcRowStride;
+ for (x = 0; x < width; x += 8) {
+ const byte *lines[4];
+ lines[0] = &data[offs];
+ lines[1] = lines[0] + srcRowStride;
+ lines[2] = lines[1] + srcRowStride;
+ lines[3] = lines[2] + srcRowStride;
+ offs += 8 * comps;
+ fxt1_quantize(encoded, lines, comps);
+ /* 128 bits per 8x4 block */
+ encoded += 4;
+ }
+ encoded += destRowStride;
+ }
+
+ cleanUp:
+ free(newSource);
}
/* lookup table for scaling 5 bit colors up to 8 bits */
static const byte _rgb_scale_5[] = {
- 0, 8, 16, 25, 33, 41, 49, 58,
- 66, 74, 82, 90, 99, 107, 115, 123,
- 132, 140, 148, 156, 165, 173, 181, 189,
- 197, 206, 214, 222, 230, 239, 247, 255
+ 0, 8, 16, 25, 33, 41, 49, 58,
+ 66, 74, 82, 90, 99, 107, 115, 123,
+ 132, 140, 148, 156, 165, 173, 181, 189,
+ 197, 206, 214, 222, 230, 239, 247, 255
};
/* lookup table for scaling 6 bit colors up to 8 bits */
static const byte _rgb_scale_6[] = {
- 0, 4, 8, 12, 16, 20, 24, 28,
- 32, 36, 40, 45, 49, 53, 57, 61,
- 65, 69, 73, 77, 81, 85, 89, 93,
- 97, 101, 105, 109, 113, 117, 121, 125,
- 130, 134, 138, 142, 146, 150, 154, 158,
- 162, 166, 170, 174, 178, 182, 186, 190,
- 194, 198, 202, 206, 210, 215, 219, 223,
- 227, 231, 235, 239, 243, 247, 251, 255
+ 0, 4, 8, 12, 16, 20, 24, 28,
+ 32, 36, 40, 45, 49, 53, 57, 61,
+ 65, 69, 73, 77, 81, 85, 89, 93,
+ 97, 101, 105, 109, 113, 117, 121, 125,
+ 130, 134, 138, 142, 146, 150, 154, 158,
+ 162, 166, 170, 174, 178, 182, 186, 190,
+ 194, 198, 202, 206, 210, 215, 219, 223,
+ 227, 231, 235, 239, 243, 247, 251, 255
};
#define UP5(c) _rgb_scale_5[(c) & 31]
#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
-#define ZERO_4UBV(v) *((dword *)(v)) = 0
static void
fxt1_decode_1HI (const byte *code, int t, byte *rgba)
{
- const dword *cc;
-
- t *= 3;
- cc = (const dword *)(code + t / 8);
- t = (cc[0] >> (t & 7)) & 7;
-
- if (t == 7) {
- ZERO_4UBV(rgba);
- } else {
- cc = (const dword *)(code + 12);
- if (t == 0) {
- rgba[BCOMP] = UP5(CC_SEL(cc, 0));
- rgba[GCOMP] = UP5(CC_SEL(cc, 5));
- rgba[RCOMP] = UP5(CC_SEL(cc, 10));
- } else if (t == 6) {
- rgba[BCOMP] = UP5(CC_SEL(cc, 15));
- rgba[GCOMP] = UP5(CC_SEL(cc, 20));
- rgba[RCOMP] = UP5(CC_SEL(cc, 25));
- } else {
- rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
- rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
- rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
- }
- rgba[ACOMP] = 255;
- }
+ const dword *cc;
+
+ t *= 3;
+ cc = (const dword *)(code + t / 8);
+ t = (cc[0] >> (t & 7)) & 7;
+
+ if (t == 7) {
+ rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
+ } else {
+ byte r, g, b;
+ cc = (const dword *)(code + 12);
+ if (t == 0) {
+ b = UP5(CC_SEL(cc, 0));
+ g = UP5(CC_SEL(cc, 5));
+ r = UP5(CC_SEL(cc, 10));
+ } else if (t == 6) {
+ b = UP5(CC_SEL(cc, 15));
+ g = UP5(CC_SEL(cc, 20));
+ r = UP5(CC_SEL(cc, 25));
+ } else {
+ b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
+ g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
+ r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
+ }
+ rgba[RCOMP] = r;
+ rgba[GCOMP] = g;
+ rgba[BCOMP] = b;
+ rgba[ACOMP] = 255;
+ }
}
static void
fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
{
- const dword *cc;
- dword kk;
-
- cc = (const dword *)code;
- if (t & 16) {
- cc++;
- t &= 15;
- }
- t = (cc[0] >> (t * 2)) & 3;
-
- t *= 15;
- cc = (const dword *)(code + 8 + t / 8);
- kk = cc[0] >> (t & 7);
- rgba[BCOMP] = UP5(kk);
- rgba[GCOMP] = UP5(kk >> 5);
- rgba[RCOMP] = UP5(kk >> 10);
- rgba[ACOMP] = 255;
+ const dword *cc;
+ dword kk;
+
+ cc = (const dword *)code;
+ if (t & 16) {
+ cc++;
+ t &= 15;
+ }
+ t = (cc[0] >> (t * 2)) & 3;
+
+ t *= 15;
+ cc = (const dword *)(code + 8 + t / 8);
+ kk = cc[0] >> (t & 7);
+ rgba[BCOMP] = UP5(kk);
+ rgba[GCOMP] = UP5(kk >> 5);
+ rgba[RCOMP] = UP5(kk >> 10);
+ rgba[ACOMP] = 255;
}
static void
fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
{
- const dword *cc;
- int col[2][3];
- int glsb, selb;
-
- cc = (const dword *)code;
- if (t & 16) {
- t &= 15;
- t = (cc[1] >> (t * 2)) & 3;
- /* col 2 */
- col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
- col[0][GCOMP] = CC_SEL(cc, 99);
- col[0][RCOMP] = CC_SEL(cc, 104);
- /* col 3 */
- col[1][BCOMP] = CC_SEL(cc, 109);
- col[1][GCOMP] = CC_SEL(cc, 114);
- col[1][RCOMP] = CC_SEL(cc, 119);
- glsb = CC_SEL(cc, 126);
- selb = CC_SEL(cc, 33);
- } else {
- t = (cc[0] >> (t * 2)) & 3;
- /* col 0 */
- col[0][BCOMP] = CC_SEL(cc, 64);
- col[0][GCOMP] = CC_SEL(cc, 69);
- col[0][RCOMP] = CC_SEL(cc, 74);
- /* col 1 */
- col[1][BCOMP] = CC_SEL(cc, 79);
- col[1][GCOMP] = CC_SEL(cc, 84);
- col[1][RCOMP] = CC_SEL(cc, 89);
- glsb = CC_SEL(cc, 125);
- selb = CC_SEL(cc, 1);
- }
-
- if (CC_SEL(cc, 124) & 1) {
- /* alpha[0] == 1 */
-
- if (t == 3) {
- ZERO_4UBV(rgba);
- } else {
- if (t == 0) {
- rgba[BCOMP] = UP5(col[0][BCOMP]);
- rgba[GCOMP] = UP5(col[0][GCOMP]);
- rgba[RCOMP] = UP5(col[0][RCOMP]);
- } else if (t == 2) {
- rgba[BCOMP] = UP5(col[1][BCOMP]);
- rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
- rgba[RCOMP] = UP5(col[1][RCOMP]);
- } else {
- rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
- rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
- rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
- }
- rgba[ACOMP] = 255;
- }
- } else {
- /* alpha[0] == 0 */
-
- if (t == 0) {
- rgba[BCOMP] = UP5(col[0][BCOMP]);
- rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
- rgba[RCOMP] = UP5(col[0][RCOMP]);
- } else if (t == 3) {
- rgba[BCOMP] = UP5(col[1][BCOMP]);
- rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
- rgba[RCOMP] = UP5(col[1][RCOMP]);
- } else {
- rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
- rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
- UP6(col[1][GCOMP], glsb));
- rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
- }
- rgba[ACOMP] = 255;
- }
+ const dword *cc;
+ dword col[2][3];
+ int glsb, selb;
+
+ cc = (const dword *)code;
+ if (t & 16) {
+ t &= 15;
+ t = (cc[1] >> (t * 2)) & 3;
+ /* col 2 */
+ col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
+ col[0][GCOMP] = CC_SEL(cc, 99);
+ col[0][RCOMP] = CC_SEL(cc, 104);
+ /* col 3 */
+ col[1][BCOMP] = CC_SEL(cc, 109);
+ col[1][GCOMP] = CC_SEL(cc, 114);
+ col[1][RCOMP] = CC_SEL(cc, 119);
+ glsb = CC_SEL(cc, 126);
+ selb = CC_SEL(cc, 33);
+ } else {
+ t = (cc[0] >> (t * 2)) & 3;
+ /* col 0 */
+ col[0][BCOMP] = CC_SEL(cc, 64);
+ col[0][GCOMP] = CC_SEL(cc, 69);
+ col[0][RCOMP] = CC_SEL(cc, 74);
+ /* col 1 */
+ col[1][BCOMP] = CC_SEL(cc, 79);
+ col[1][GCOMP] = CC_SEL(cc, 84);
+ col[1][RCOMP] = CC_SEL(cc, 89);
+ glsb = CC_SEL(cc, 125);
+ selb = CC_SEL(cc, 1);
+ }
+
+ if (CC_SEL(cc, 124) & 1) {
+ /* alpha[0] == 1 */
+
+ if (t == 3) {
+ /* zero */
+ rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
+ } else {
+ byte r, g, b;
+ if (t == 0) {
+ b = UP5(col[0][BCOMP]);
+ g = UP5(col[0][GCOMP]);
+ r = UP5(col[0][RCOMP]);
+ } else if (t == 2) {
+ b = UP5(col[1][BCOMP]);
+ g = UP6(col[1][GCOMP], glsb);
+ r = UP5(col[1][RCOMP]);
+ } else {
+ b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
+ g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
+ r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
+ }
+ rgba[RCOMP] = r;
+ rgba[GCOMP] = g;
+ rgba[BCOMP] = b;
+ rgba[ACOMP] = 255;
+ }
+ } else {
+ /* alpha[0] == 0 */
+ byte r, g, b;
+ if (t == 0) {
+ b = UP5(col[0][BCOMP]);
+ g = UP6(col[0][GCOMP], glsb ^ selb);
+ r = UP5(col[0][RCOMP]);
+ } else if (t == 3) {
+ b = UP5(col[1][BCOMP]);
+ g = UP6(col[1][GCOMP], glsb);
+ r = UP5(col[1][RCOMP]);
+ } else {
+ b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
+ g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
+ UP6(col[1][GCOMP], glsb));
+ r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
+ }
+ rgba[RCOMP] = r;
+ rgba[GCOMP] = g;
+ rgba[BCOMP] = b;
+ rgba[ACOMP] = 255;
+ }
}
static void
fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
{
- const dword *cc;
-
- cc = (const dword *)code;
- if (CC_SEL(cc, 124) & 1) {
- /* lerp == 1 */
- int col0[4];
-
- if (t & 16) {
- t &= 15;
- t = (cc[1] >> (t * 2)) & 3;
- /* col 2 */
- col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
- col0[GCOMP] = CC_SEL(cc, 99);
- col0[RCOMP] = CC_SEL(cc, 104);
- col0[ACOMP] = CC_SEL(cc, 119);
- } else {
- t = (cc[0] >> (t * 2)) & 3;
- /* col 0 */
- col0[BCOMP] = CC_SEL(cc, 64);
- col0[GCOMP] = CC_SEL(cc, 69);
- col0[RCOMP] = CC_SEL(cc, 74);
- col0[ACOMP] = CC_SEL(cc, 109);
- }
-
- if (t == 0) {
- rgba[BCOMP] = UP5(col0[BCOMP]);
- rgba[GCOMP] = UP5(col0[GCOMP]);
- rgba[RCOMP] = UP5(col0[RCOMP]);
- rgba[ACOMP] = UP5(col0[ACOMP]);
- } else if (t == 3) {
- rgba[BCOMP] = UP5(CC_SEL(cc, 79));
- rgba[GCOMP] = UP5(CC_SEL(cc, 84));
- rgba[RCOMP] = UP5(CC_SEL(cc, 89));
- rgba[ACOMP] = UP5(CC_SEL(cc, 114));
- } else {
- rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
- rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
- rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
- rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
- }
- } else {
- /* lerp == 0 */
-
- if (t & 16) {
- cc++;
- t &= 15;
- }
- t = (cc[0] >> (t * 2)) & 3;
-
- if (t == 3) {
- ZERO_4UBV(rgba);
- } else {
- dword kk;
- cc = (const dword *)code;
- rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
- t *= 15;
- cc = (const dword *)(code + 8 + t / 8);
- kk = cc[0] >> (t & 7);
- rgba[BCOMP] = UP5(kk);
- rgba[GCOMP] = UP5(kk >> 5);
- rgba[RCOMP] = UP5(kk >> 10);
- }
- }
+ const dword *cc;
+ byte r, g, b, a;
+
+ cc = (const dword *)code;
+ if (CC_SEL(cc, 124) & 1) {
+ /* lerp == 1 */
+ dword col0[4];
+
+ if (t & 16) {
+ t &= 15;
+ t = (cc[1] >> (t * 2)) & 3;
+ /* col 2 */
+ col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
+ col0[GCOMP] = CC_SEL(cc, 99);
+ col0[RCOMP] = CC_SEL(cc, 104);
+ col0[ACOMP] = CC_SEL(cc, 119);
+ } else {
+ t = (cc[0] >> (t * 2)) & 3;
+ /* col 0 */
+ col0[BCOMP] = CC_SEL(cc, 64);
+ col0[GCOMP] = CC_SEL(cc, 69);
+ col0[RCOMP] = CC_SEL(cc, 74);
+ col0[ACOMP] = CC_SEL(cc, 109);
+ }
+
+ if (t == 0) {
+ b = UP5(col0[BCOMP]);
+ g = UP5(col0[GCOMP]);
+ r = UP5(col0[RCOMP]);
+ a = UP5(col0[ACOMP]);
+ } else if (t == 3) {
+ b = UP5(CC_SEL(cc, 79));
+ g = UP5(CC_SEL(cc, 84));
+ r = UP5(CC_SEL(cc, 89));
+ a = UP5(CC_SEL(cc, 114));
+ } else {
+ b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
+ g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
+ r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
+ a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
+ }
+ } else {
+ /* lerp == 0 */
+
+ if (t & 16) {
+ cc++;
+ t &= 15;
+ }
+ t = (cc[0] >> (t * 2)) & 3;
+
+ if (t == 3) {
+ /* zero */
+ r = g = b = a = 0;
+ } else {
+ dword kk;
+ cc = (const dword *)code;
+ a = UP5(cc[3] >> (t * 5 + 13));
+ t *= 15;
+ cc = (const dword *)(code + 8 + t / 8);
+ kk = cc[0] >> (t & 7);
+ b = UP5(kk);
+ g = UP5(kk >> 5);
+ r = UP5(kk >> 10);
+ }
+ }
+ rgba[RCOMP] = r;
+ rgba[GCOMP] = g;
+ rgba[BCOMP] = b;
+ rgba[ACOMP] = a;
}
TAPI void TAPIENTRY
-fxt1_decode_1 (const void *texture, int stride,
- int i, int j, byte *rgba)
+fxt1_decode_1 (const void *texture, int stride, /* in pixels */
+ int i, int j, byte *rgba)
{
- static void (*decode_1[]) (const byte *, int, byte *) = {
- fxt1_decode_1HI, /* cc-high = "00?" */
- fxt1_decode_1HI, /* cc-high = "00?" */
- fxt1_decode_1CHROMA, /* cc-chroma = "010" */
- fxt1_decode_1ALPHA, /* alpha = "011" */
- fxt1_decode_1MIXED, /* mixed = "1??" */
- fxt1_decode_1MIXED, /* mixed = "1??" */
- fxt1_decode_1MIXED, /* mixed = "1??" */
- fxt1_decode_1MIXED /* mixed = "1??" */
- };
-
- const byte *code = (const byte *)texture +
- ((j / 4) * (stride / 8) + (i / 8)) * 16;
- int mode = CC_SEL(code, 125);
- int t = i & 7;
-
- if (t & 4) {
- t += 12;
- }
- t += (j & 3) * 4;
-
- decode_1[mode](code, t, rgba);
-
-#if VERBOSE
- {
- extern int cc_chroma;
- extern int cc_alpha;
- extern int cc_high;
- extern int cc_mixed;
- static int *cctype[] = {
- &cc_high,
- &cc_high,
- &cc_chroma,
- &cc_alpha,
- &cc_mixed,
- &cc_mixed,
- &cc_mixed,
- &cc_mixed
- };
- (*cctype[mode])++;
- }
-#endif
+ static void (*decode_1[]) (const byte *, int, byte *) = {
+ fxt1_decode_1HI, /* cc-high = "00?" */
+ fxt1_decode_1HI, /* cc-high = "00?" */
+ fxt1_decode_1CHROMA, /* cc-chroma = "010" */
+ fxt1_decode_1ALPHA, /* alpha = "011" */
+ fxt1_decode_1MIXED, /* mixed = "1??" */
+ fxt1_decode_1MIXED, /* mixed = "1??" */
+ fxt1_decode_1MIXED, /* mixed = "1??" */
+ fxt1_decode_1MIXED /* mixed = "1??" */
+ };
+
+ const byte *code = (const byte *)texture +
+ ((j / 4) * (stride / 8) + (i / 8)) * 16;
+ int mode = CC_SEL(code, 125);
+ int t = i & 7;
+
+ if (t & 4) {
+ t += 12;
+ }
+ t += (j & 3) * 4;
+
+ decode_1[mode](code, t, rgba);
}
#ifndef FXT1_H_included
#define FXT1_H_included
-TAPI int TAPIENTRY
-fxt1_encode (int width, int height, int comps,
+TAPI void TAPIENTRY
+fxt1_encode (unsigned int width, unsigned int height, int comps,
const void *source, int srcRowStride,
void *dest, int destRowStride);
TAPI void TAPIENTRY
-fxt1_decode_1 (const void *texture, int stride /* in pixels */,
- int i, int j, byte *rgba);
+fxt1_decode_1 (const void *texture, int stride,
+ int i, int j, byte *rgba);
#endif
#ifndef INTERNAL_H_included
#define INTERNAL_H_included
+#include <stdint.h>
+
/*****************************************************************************\
* DLL stuff
\*****************************************************************************/
* 64bit types on 32bit machine
\*****************************************************************************/
-#if (defined(__GNUC__) && !defined(__cplusplus)) || defined(__MSC__)
+/*
+ * Define a 64-bit unsigned integer type and macros
+ */
+#if 1
+
+#define Q_NATIVE 1
-typedef unsigned long long qword;
+typedef uint64_t qword;
#define Q_MOV32(a, b) a = b
#define Q_OR32(a, b) a |= b
#define Q_SHL(a, c) a <<= c
-#else /* !__GNUC__ */
+#else
+
+#define Q_NATIVE 0
typedef struct {
- dword lo, hi;
+ dword lo, hi;
} qword;
#define Q_MOV32(a, b) a.lo = b
#define Q_OR32(a, b) a.lo |= b
-#define Q_SHL(a, c) \
- do { \
- if ((c) >= 32) { \
- a.hi = a.lo << ((c) - 32); \
- a.lo = 0; \
- } else { \
- a.hi = (a.hi << (c)) | (a.lo >> (32 - (c)));\
- a.lo <<= c; \
- } \
- } while (0)
-#endif /* !__GNUC__ */
+#define Q_SHL(a, c) \
+ do { \
+ if ((c) >= 32) { \
+ a.hi = a.lo << ((c) - 32); \
+ a.lo = 0; \
+ } else { \
+ a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
+ a.lo <<= (c); \
+ } \
+ } while (0)
+
+#endif
/*****************************************************************************\
#define F(i) (float)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
#define SAFECDOT 1 /* for paranoids */
-#define MAKEIVEC(NV, NC, IV, B, V0, V1) \
- do { \
- /* compute interpolation vector */\
- float d2 = 0.0F; \
- float rd2; \
- \
- for (i = 0; i < NC; i++) { \
- IV[i] = (V1[i] - V0[i]) * F(i);\
- d2 += IV[i] * IV[i]; \
- } \
- rd2 = (float)NV / d2; \
- B = 0; \
- for (i = 0; i < NC; i++) { \
- IV[i] *= F(i); \
- B -= IV[i] * V0[i]; \
- IV[i] *= rd2; \
- } \
- B = B * rd2 + 0.5F; \
- } while (0)
+#define MAKEIVEC(NV, NC, IV, B, V0, V1) \
+ do { \
+ /* compute interpolation vector */ \
+ float d2 = 0.0F; \
+ float rd2; \
+ \
+ for (i = 0; i < NC; i++) { \
+ IV[i] = (V1[i] - V0[i]) * F(i); \
+ d2 += IV[i] * IV[i]; \
+ } \
+ rd2 = (float)NV / d2; \
+ B = 0; \
+ for (i = 0; i < NC; i++) { \
+ IV[i] *= F(i); \
+ B -= IV[i] * V0[i]; \
+ IV[i] *= rd2; \
+ } \
+ B = B * rd2 + 0.5f; \
+ } while (0)
#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
- do { \
- float dot = 0.0F; \
- for (i = 0; i < NC; i++) { \
- dot += V[i] * IV[i]; \
- } \
- TEXEL = (int)(dot + B); \
- if (SAFECDOT) { \
- if (TEXEL < 0) { \
- TEXEL = 0; \
- } else if (TEXEL > NV) { \
- TEXEL = NV; \
- } \
- } \
- } while (0)
+ do { \
+ float dot = 0.0F; \
+ for (i = 0; i < NC; i++) { \
+ dot += V[i] * IV[i]; \
+ } \
+ TEXEL = (int)(dot + B); \
+ if (SAFECDOT) { \
+ if (TEXEL < 0) { \
+ TEXEL = 0; \
+ } else if (TEXEL > NV) { \
+ TEXEL = NV; \
+ } \
+ } \
+ } while (0)
/*****************************************************************************\
* Utility functions
\*****************************************************************************/
-void
-_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight,
- unsigned int outWidth, unsigned int outHeight,
- unsigned int comps,
- const byte *src, int srcRowStride,
- unsigned char *dest);
+/** Copy a 4-element vector */
+#define COPY_4V( DST, SRC ) \
+do { \
+ (DST)[0] = (SRC)[0]; \
+ (DST)[1] = (SRC)[1]; \
+ (DST)[2] = (SRC)[2]; \
+ (DST)[3] = (SRC)[3]; \
+} while (0)
+
+/** Copy a 4-element unsigned byte vector */
+static inline void
+COPY_4UBV(uint8_t dst[4], const uint8_t src[4])
+{
+#if defined(__i386__)
+ *((uint32_t *) dst) = *((uint32_t *) src);
+#else
+ /* The uint32_t cast might fail if DST or SRC are not dword-aligned (RISC) */
+ COPY_4V(dst, src);
+#endif
+}
+
+void reorder_source_3(byte *tex, dword width, dword height, int srcRowStride);
+void *reorder_source_3_alloc(const byte *source, dword width, dword height, int srcRowStride);
+void reorder_source_4(byte *tex, dword width, dword height, int srcRowStride);
+void *reorder_source_4_alloc(const byte *source, dword width, dword height, int srcRowStride);
#endif
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
- * _mesa_upscale_teximage2d speedup
- */
-
#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
#include "types.h"
#include "internal.h"
+void reorder_source_3(byte *tex, dword width, dword height, int srcRowStride)
+{
+ byte *line;
+ byte t;
+ dword i, j;
+
+ for (i = 0; i < height; i++) {
+ line = &tex[srcRowStride * i];
+ for (j = 0; j < width; j++) {
+ t = line[2];
+ line[2] = line[0];
+ line[0] = t;
+ line += 3;
+ }
+ }
+}
-void
-_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight,
- unsigned int outWidth, unsigned int outHeight,
- unsigned int comps,
- const byte *src, int srcRowStride,
- byte *dest)
+void *reorder_source_3_alloc(const byte *source, dword width, dword height, int srcRowStride)
{
- unsigned int i, j, k;
+ byte *tex;
- assert(outWidth >= inWidth);
- assert(outHeight >= inHeight);
+ tex = malloc(height * srcRowStride);
+ if (!tex)
+ goto out;
-#if 1 /* H.Morii - faster loops */
- for (i = 0; i < inHeight; i++) {
- for (j = 0; j < inWidth; j++) {
- const int aa = (i * outWidth + j) * comps;
- const int bb = i * srcRowStride + j * comps;
- for (k = 0; k < comps; k++) {
- dest[aa + k] = src[bb + k];
- }
- }
- for (; j < outWidth; j++) {
- const int aa = (i * outWidth + j) * comps;
- const int bb = i * srcRowStride + (j - inWidth) * comps;
- for (k = 0; k < comps; k++) {
- dest[aa + k] = src[bb + k];
- }
- }
- }
- for (; i < outHeight; i++) {
- for (j = 0; j < inWidth; j++) {
- const int aa = (i * outWidth + j) * comps;
- const int bb = (i - inHeight) * srcRowStride + j * comps;
- for (k = 0; k < comps; k++) {
- dest[aa + k] = src[bb + k];
- }
- }
- for (; j < outWidth; j++) {
- const int aa = (i * outWidth + j) * comps;
- const int bb = (i - inHeight) * srcRowStride + (j - inWidth) * comps;
- for (k = 0; k < comps; k++) {
- dest[aa + k] = src[bb + k];
- }
- }
- }
-#else
- for (i = 0; i < outHeight; i++) {
- const int ii = i % inHeight;
- for (j = 0; j < outWidth; j++) {
- const int jj = j % inWidth;
- const int aa = (i * outWidth + j) * comps;
- const int bb = ii * srcRowStride + jj * comps;
- for (k = 0; k < comps; k++) {
- dest[aa + k] = src[bb + k];
- }
- }
+ memcpy(tex, source, height * srcRowStride);
+ reorder_source_3(tex, width, height, srcRowStride);
+
+out:
+ return tex;
+}
+
+void reorder_source_4(byte *tex, dword width, dword height, int srcRowStride)
+{
+ byte *line;
+ byte t;
+ dword i, j;
+
+ for (i = 0; i < height; i++) {
+ line = &tex[srcRowStride * i];
+ for (j = 0; j < width; j++) {
+ t = line[2];
+ line[2] = line[0];
+ line[0] = t;
+ line += 4;
+ }
}
-#endif
+}
+
+void *reorder_source_4_alloc(const byte *source, dword width, dword height, int srcRowStride)
+{
+ byte *tex;
+
+ tex = malloc(height * srcRowStride);
+ if (!tex)
+ goto out;
+
+ memcpy(tex, source, height * srcRowStride);
+ reorder_source_4(tex, width, height, srcRowStride);
+
+out:
+ return tex;
}
#include <assert.h>
+#include <stdlib.h>
#include "types.h"
#include "internal.h"
-#include "dxtn.h"
+#include <SDL_opengl.h>
+#include "../../Glide64/m64p.h"
+typedef void (*dxtCompressTexFuncExt)(GLint srccomps, GLint width, GLint height,
+ const GLubyte *srcPixData, GLenum destformat,
+ GLubyte *dest, GLint dstRowStride);
+static dxtCompressTexFuncExt _tx_compress_dxtn = NULL;
-#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
-#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
-#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
-#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
+#ifdef TXCDXTN_EXTERNAL
+#include "../../Glide64/osal_dynamiclib.h"
-TAPI void TAPIENTRY
-fetch_2d_texel_rgb_dxt1 (int texImage_RowStride,
- const byte *texImage_Data,
- int i, int j,
- byte *texel)
-{
- dxt1_rgb_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
-}
+#if defined(_WIN32) || defined(WIN32)
+#define DXTN_LIBNAME "dxtn.dll"
+#elif defined(__DJGPP__)
+#define DXTN_LIBNAME "dxtn.dxe"
+#else
+#define DXTN_LIBNAME "libtxc_dxtn.so"
+#endif
+static m64p_dynlib_handle dxtn_lib_handle;
-TAPI void TAPIENTRY
-fetch_2d_texel_rgba_dxt1 (int texImage_RowStride,
- const byte *texImage_Data,
- int i, int j,
- byte *texel)
+static void tx_compress_dxtn_init()
{
- dxt1_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
-}
+ m64p_error rval;
+ if (_tx_compress_dxtn)
+ return;
-TAPI void TAPIENTRY
-fetch_2d_texel_rgba_dxt3 (int texImage_RowStride,
- const byte *texImage_Data,
- int i, int j,
- byte *texel)
-{
- dxt3_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+ rval = osal_dynlib_open(&dxtn_lib_handle, DXTN_LIBNAME);
+ if (rval != M64ERR_SUCCESS) {
+ WriteLog(M64MSG_WARNING, "Failed to open %s", DXTN_LIBNAME);
+ return;
+ }
+
+ _tx_compress_dxtn = osal_dynlib_getproc(dxtn_lib_handle, "tx_compress_dxtn");
+ if (!_tx_compress_dxtn) {
+ WriteLog(M64MSG_WARNING, "Shared library '%s' invalid; no PluginGetVersion() function found.", DXTN_LIBNAME, "tx_compress_dxtn");
+ osal_dynlib_close(dxtn_lib_handle);
+ return;
+ }
}
+#else
-TAPI void TAPIENTRY
-fetch_2d_texel_rgba_dxt5 (int texImage_RowStride,
- const byte *texImage_Data,
- int i, int j,
- byte *texel)
+#include "s2tc/txc_dxtn.h"
+
+static void tx_compress_dxtn_init()
{
- dxt5_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+ _tx_compress_dxtn = tx_compress_dxtn;
}
+#endif
+
TAPI void TAPIENTRY
-tx_compress_dxtn (int srccomps, int width, int height,
- const byte *source, int destformat, byte *dest,
- int destRowStride)
+tx_compress_dxtn_rgba(int srccomps, int width, int height,
+ const byte *source, int destformat, byte *dest,
+ int destRowStride)
{
int srcRowStride = width * srccomps;
+ void *newSource = NULL;
- switch (destformat) {
- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
- dxt1_rgb_encode(width, height, srccomps,
- source, srcRowStride,
- dest, destRowStride);
- break;
- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
- dxt1_rgba_encode(width, height, srccomps,
- source, srcRowStride,
- dest, destRowStride);
- break;
- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
- dxt3_rgba_encode(width, height, srccomps,
- source, srcRowStride,
- dest, destRowStride);
- break;
- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
- dxt5_rgba_encode(width, height, srccomps,
- source, srcRowStride,
- dest, destRowStride);
- break;
- default:
- assert(0);
+ tx_compress_dxtn_init();
+ if (!_tx_compress_dxtn) {
+ WriteLog(M64MSG_ERROR, "Failed to initialize S3TC compressor");
+ return;
}
+
+ assert(srccomps == 3 || srccomps == 4);
+
+ if (srccomps == 3)
+ newSource = reorder_source_3_alloc(source, width, height, srcRowStride);
+ if (srccomps == 4)
+ newSource = reorder_source_4_alloc(source, width, height, srcRowStride);
+
+ _tx_compress_dxtn(srccomps, width, height, newSource, destformat, dest,
+ destRowStride);
+
+ free(newSource);
}
SHADER_HEADER
"#define Z_MAX 65536.0 \n"
"attribute highp vec4 aVertex; \n"
-"attribute mediump vec4 aColor; \n" //*SEB* highp -> lowp
+"attribute mediump vec4 aColor; \n" //*SEB* highp -> mediump
"attribute highp vec4 aMultiTexCoord0; \n"
"attribute highp vec4 aMultiTexCoord1; \n"
"attribute float aFog; \n"
int log_length;
//#ifndef ANDROID
+#if 0
+// unfortunatly, Pandora has not the gl_FragDepthEXT extension... So I disable this block.
// depth shader
fragment_depth_shader_object = glCreateShader(GL_FRAGMENT_SHADER);
- char s[128];
+ char s[512];
// ZIGGY convert a 565 texture into depth component
- sprintf(s, "gl_FragDepth = dot(texture2D(texture0, vec2(gl_TexCoord[0])), vec4(31*64*32, 63*32, 31, 0))*%g + %g; \n", zscale/2/65535.0, 1-zscale/2);
+ sprintf(s, "gl_FragDepthEXT = dot(texture2D(texture0, vec2(gl_TexCoord[0])), vec4(31*64*32, 63*32, 31, 0))*%g + %g; \n", zscale/2/65535.0, 1-zscale/2);
fragment_shader = (char*)malloc(strlen(fragment_shader_header)+
strlen(s)+
strlen(fragment_shader_end)+1);
glCompileShader(fragment_depth_shader_object);
check_compile(fragment_depth_shader_object);
-//#endif
+#endif
// default shader
fragment_shader_object = glCreateShader(GL_FRAGMENT_SHADER);
// ZIGGY viewport_offset is WIN32 specific, with SDL just set it to zero
viewport_offset = 0; //-10 //-20;
- // ZIGGY not sure, but it might be better to let the system choose
+ CoreVideo_Init();
CoreVideo_GL_SetAttribute(M64P_GL_DOUBLEBUFFER, 1);
CoreVideo_GL_SetAttribute(M64P_GL_SWAP_CONTROL, vsync);
CoreVideo_GL_SetAttribute(M64P_GL_BUFFER_SIZE, 16);
//SDL_QuitSubSystem(SDL_INIT_VIDEO);
//sleep(2);
#endif
+
+ CoreVideo_Quit();
+
return FXTRUE;
}
/* Check for OS */
#if defined(__IRIX__) || defined(__sparc__) || defined(__linux__) || \
- defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
+ defined(__FreeBSD_kernel__) || defined(__GNU__)
# define GLIDE_OS GLIDE_OS_UNIX
#elif defined(__DOS__)
# define GLIDE_OS GLIDE_OS_DOS32
#define LOG(...) // WriteLog(M64MSG_VERBOSE, __VA_ARGS__)
#define LOGINFO(...) WriteLog(M64MSG_INFO, __VA_ARGS__)
+#ifdef __cplusplus
+extern "C" {
+#endif
void WriteLog(m64p_msg_level level, const char *msg, ...);
+#ifdef __cplusplus
+}
+#endif
#ifndef _WIN32
- Add tlut support for 16bit textures. Remove hacks.
- Add trapezoid support to le_triangle. Remove hacks.
- Reduce "Known issues" list :)
-- Port over C ports of NASM functions from balrog's fork
GlideHQ
- Add OpenGL texture format support.
endif
# base CFLAGS, LDLIBS, and LDFLAGS
-OPTFLAGS ?= -Ofast -ffast-math -flto -fuse-linker-plugin
+OPTFLAGS ?= -O4 -ffast-math
+#-flto -fuse-linker-plugin
WARNFLAGS ?= -Wall
CFLAGS += $(OPTFLAGS) $(WARNFLAGS) -ffast-math -fno-strict-aliasing -fvisibility=hidden -I../../src
CXXFLAGS += -fvisibility-inlines-hidden
LDLIBS += -ldl
endif
ifeq ($(OS), OSX)
- # Select the proper SDK
- # Also, SDKs are stored in a different location since XCode 4.3
- OSX_SDK ?= $(shell sw_vers -productVersion | cut -f1 -f2 -d .)
- OSX_XCODEMAJ = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f1 -d .)
- OSX_XCODEMIN = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f2 -d .)
- OSX_XCODEGE43 = $(shell echo "`expr $(OSX_XCODEMAJ) \>= 4``expr $(OSX_XCODEMIN) \>= 3`")
- ifeq ($(OSX_XCODEGE43), 11)
- OSX_SYSROOT := /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
- else
- OSX_SYSROOT := /Developer/SDKs
- endif
+ #xcode-select has been around since XCode 3.0, i.e. OS X 10.5
+ OSX_SDK_ROOT = $(shell xcode-select -print-path)/Platforms/MacOSX.platform/Developer/SDKs
+ OSX_SDK_PATH = $(OSX_SDK_ROOT)/$(shell ls $(OSX_SDK_ROOT) | tail -1)
ifeq ($(CPU), X86)
ifeq ($(ARCH_DETECTED), 64BITS)
- CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
LDLIBS += -ldl
else
- CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
LDLIBS += -ldl -read_only_relocs suppress
endif
endif
rebuild: clean all
# build dependency files
-CFLAGS += -MD
+CFLAGS += -MD -MP
-include $(OBJECTS:.o=.d)
# standard build rules
|M64TYPE_STRING
|Path to a directory to search when looking for shared data files in the <tt>ConfigGetSharedDataFilepath()</tt> function.
|-
+|CountPerOp
+|M64TYPE_INT
+|Force number of cycles per emulated instruction when set greater than 0.
+|-
+|DelaySI
+|M64TYPE_BOOL
+|Delay interrupt after DMA SI read/write.
+|-
|}
These configuration parameters are used in the Core's event loop to detect keyboard and joystick commands. They are stored in a configuration section called "CoreEvents" and may be altered by the front-end in order to adjust the behaviour of the emulator. These may be adjusted at any time and the effect of the change should occur immediately. The Keysym value stored is actually <tt>(SDLMod << 16) || SDLKey</tt>, so that keypresses with modifiers like shift, control, or alt may be used.
LDFLAGS += -Wl,-version-script,$(SRCDIR)/api/api_export.ver
endif
ifeq ($(OS), OSX)
- # Select the proper SDK
- # Also, SDKs are stored in a different location since XCode 4.3
- OSX_SDK ?= $(shell sw_vers -productVersion | cut -f1 -f2 -d .)
- OSX_XCODEMAJ = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f1 -d .)
- OSX_XCODEMIN = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f2 -d .)
- OSX_XCODEGE43 = $(shell echo "`expr $(OSX_XCODEMAJ) \>= 4``expr $(OSX_XCODEMIN) \>= 3`")
- ifeq ($(OSX_XCODEGE43), 11)
- OSX_SYSROOT := /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
- else
- OSX_SYSROOT := /Developer/SDKs
- endif
+ #xcode-select has been around since XCode 3.0, i.e. OS X 10.5
+ OSX_SDK_ROOT = $(shell xcode-select -print-path)/Platforms/MacOSX.platform/Developer/SDKs
+ OSX_SDK_PATH = $(OSX_SDK_ROOT)/$(shell ls $(OSX_SDK_ROOT) | tail -1)
TARGET = libmupen64plus$(POSTFIX).dylib
LDFLAGS += -bundle -read_only_relocs suppress
LDLIBS += -ldl
ifeq ($(CPU), X86)
ifeq ($(ARCH_DETECTED), 64BITS)
- CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
else
- CFLAGS += -pipe -mmmx -msse -arch i686 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -mmmx -msse -arch i686 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
ifneq ($(PROFILE), 1)
CFLAGS += -fomit-frame-pointer
endif
$(RM) -r $(TARGET) $(SONAME) $(OBJDIR)
# build dependency files
-CFLAGS += -MD
+CFLAGS += -MD -MP
-include $(OBJECTS:.o=.d)
CXXFLAGS += $(CFLAGS)
#if !defined(NO_ASM) && (defined(__i386__) || defined(__x86_64__))
+/* we must define PACKAGE so that bfd.h (which is included from dis-asm.h) doesn't throw an error */
+#define PACKAGE "mupen64plus-core"
#include <dis-asm.h>
#include <stdarg.h>
int execute_next;
// If game is Zelda OOT, apply subscreen delay fix
- if (strncmp((char *)ROM_HEADER.Name, "THE LEGEND OF ZELDA", 19) == 0 && entry == ENTRY_VI) {
+ if (entry == ENTRY_VI && strncmp((char *)ROM_HEADER.Name, "THE LEGEND OF ZELDA", 19) == 0) {
+ uint32_t subscreen_address = 0;
+ uint32_t credits_address[4];
+ credits_address[0] = 0;
if (sl(ROM_HEADER.CRC1) == 0xEC7011B7 && sl(ROM_HEADER.CRC2) == 0x7616D72B) {
// Legend of Zelda, The - Ocarina of Time (U) + (J) (V1.0)
- execute_cheat(0x801DA5CB, 0x0002, NULL);
+ subscreen_address = 0x801DA5CB;
} else if (sl(ROM_HEADER.CRC1) == 0xD43DA81F && sl(ROM_HEADER.CRC2) == 0x021E1E19) {
// Legend of Zelda, The - Ocarina of Time (U) + (J) (V1.1)
- execute_cheat(0x801DA78B, 0x0002, NULL);
+ subscreen_address = 0x801DA78B;
} else if (sl(ROM_HEADER.CRC1) == 0x693BA2AE && sl(ROM_HEADER.CRC2) == 0xB7F14E9F) {
// Legend of Zelda, The - Ocarina of Time (U) + (J) (V1.2)
- execute_cheat(0x801DAE8B, 0x0002, NULL);
+ subscreen_address = 0x801DAE8B;
} else if (sl(ROM_HEADER.CRC1) == 0xB044B569 && sl(ROM_HEADER.CRC2) == 0x373C1985) {
// Legend of Zelda, The - Ocarina of Time (E) (V1.0)
- execute_cheat(0x801D860B, 0x0002, NULL);
+ subscreen_address = 0x801D860B;
} else if (sl(ROM_HEADER.CRC1) == 0xB2055FBD && sl(ROM_HEADER.CRC2) == 0x0BAB4E0C) {
// Legend of Zelda, The - Ocarina of Time (E) (V1.1)
- execute_cheat(0x801D864B, 0x0002, NULL);
+ subscreen_address = 0x801D864B;
+ // GC Versions such as Master Quest also require the End Credits Fix.
+ } else if (sl(ROM_HEADER.CRC1) == 0x1D4136F3 && sl(ROM_HEADER.CRC2) == 0xAF63EEA9) {
+ // Legend of Zelda, The - Ocarina of Time - Master Quest (E) (GC Version)
+ subscreen_address = 0x801D8F4B;
+ credits_address[0] = 0xD109A8C4;
+ credits_address[1] = 0x8109A8C4;
+ credits_address[2] = 0xD109A8C6;
+ credits_address[3] = 0x8109A8C6;
+ } else if (sl(ROM_HEADER.CRC1) == 0x09465AC3 && sl(ROM_HEADER.CRC2) == 0xF8CB501B) {
+ // Legend of Zelda, The - Ocarina of Time (E) (GC Version)
+ subscreen_address = 0x801D8F8B;
+ credits_address[0] = 0xD109A8E4;
+ credits_address[1] = 0x8109A8E4;
+ credits_address[2] = 0xD109A8E6;
+ credits_address[3] = 0x8109A8E6;
+ } else if (sl(ROM_HEADER.CRC1) == 0xF3DD35BA && sl(ROM_HEADER.CRC2) == 0x4152E075) {
+ // Legend of Zelda, The - Ocarina of Time (U) (GC Version)
+ subscreen_address = 0x801DB78B;
+ credits_address[0] = 0xD109A814;
+ credits_address[1] = 0x8109A814;
+ credits_address[2] = 0xD109A816;
+ credits_address[3] = 0x8109A816;
+ } else if (sl(ROM_HEADER.CRC1) == 0xF034001A && sl(ROM_HEADER.CRC2) == 0xAE47ED06) {
+ // Legend of Zelda, The - Ocarina of Time - Master Quest (U) (GC Version)
+ subscreen_address = 0x801DB74B;
+ credits_address[0] = 0xD109A7F4;
+ credits_address[1] = 0x8109A7F4;
+ credits_address[2] = 0xD109A7F6;
+ credits_address[3] = 0x8109A7F6;
+ } else if (sl(ROM_HEADER.CRC1) == 0xF7F52DB8 && sl(ROM_HEADER.CRC2) == 0x2195E636) {
+ // Zelda no Densetsu - Toki no Ocarina - Zelda Collection Version (J) (GC Version)
+ subscreen_address = 0x801DB78B;
+ credits_address[0] = 0xD109A814;
+ credits_address[1] = 0x8109A814;
+ credits_address[2] = 0xD109A816;
+ credits_address[3] = 0x8109A816;
+ } else if (sl(ROM_HEADER.CRC1) == 0xF611F4BA && sl(ROM_HEADER.CRC2) == 0xC584135C) {
+ // Zelda no Densetsu - Toki no Ocarina GC (J) (GC Version)
+ subscreen_address = 0x801DB78B;
+ credits_address[0] = 0xD109A834;
+ credits_address[1] = 0x8109A834;
+ credits_address[2] = 0xD109A836;
+ credits_address[3] = 0x8109A836;
+ } else if (sl(ROM_HEADER.CRC1) == 0xF43B45BA && sl(ROM_HEADER.CRC2) == 0x2F0E9B6F) {
+ // Zelda no Densetsu - Toki no Ocarina GC Ura (J) (GC Version)
+ subscreen_address = 0x801DB78B;
+ credits_address[0] = 0xD109A814;
+ credits_address[1] = 0x8109A814;
+ credits_address[2] = 0xD109A816;
+ credits_address[3] = 0x8109A816;
} else {
- // Legend of Zelda, The - Ocarina of Time Master Quest
- execute_cheat(0x801D8F4B, 0x0002, NULL);
+ // UNKNOWN VERSION
+ DebugMessage(M64MSG_WARNING, "Warning: Ocarina of Time version could not be determined. No fixes applied.");
+ }
+ if (subscreen_address) {
+ execute_cheat(subscreen_address, 0x0002, NULL);
+ if (credits_address[0]){
+ if (execute_cheat(credits_address[0], 0x0320, NULL));
+ execute_cheat(credits_address[1], 0x0000, NULL);
+ if (execute_cheat(credits_address[2], 0xF809, NULL));
+ execute_cheat(credits_address[3], 0x0000, NULL);
+ }
}
}
#define SDL_SCANCODE_G SDLK_g
#define SDL_SCANCODE_RETURN SDLK_RETURN
#define SDL_SCANCODE_0 SDLK_0
+#define SDL_SCANCODE_1 SDLK_1
+#define SDL_SCANCODE_2 SDLK_2
+#define SDL_SCANCODE_3 SDLK_3
+#define SDL_SCANCODE_4 SDLK_4
+#define SDL_SCANCODE_5 SDLK_5
+#define SDL_SCANCODE_6 SDLK_6
+#define SDL_SCANCODE_7 SDLK_7
+#define SDL_SCANCODE_8 SDLK_8
#define SDL_SCANCODE_9 SDLK_9
#define SDL_SetEventFilter(func, data) SDL_SetEventFilter(func)
return 1;
}
+static int get_saveslot_from_keysym(int keysym)
+{
+ switch (keysym) {
+ case SDL_SCANCODE_0:
+ return 0;
+ case SDL_SCANCODE_1:
+ return 1;
+ case SDL_SCANCODE_2:
+ return 2;
+ case SDL_SCANCODE_3:
+ return 3;
+ case SDL_SCANCODE_4:
+ return 4;
+ case SDL_SCANCODE_5:
+ return 5;
+ case SDL_SCANCODE_6:
+ return 6;
+ case SDL_SCANCODE_7:
+ return 7;
+ case SDL_SCANCODE_8:
+ return 8;
+ case SDL_SCANCODE_9:
+ return 9;
+ default:
+ return -1;
+ }
+}
+
/*********************************************************************************************************
* sdl keyup/keydown handlers
*/
void event_sdl_keydown(int keysym, int keymod)
{
+ int slot;
+
/* check for the only 2 hard-coded key commands: Alt-enter for fullscreen and 0-9 for save state slot */
if (keysym == SDL_SCANCODE_RETURN && keymod & (KMOD_LALT | KMOD_RALT))
gfx.changeWindow();
- else if (keysym >= SDL_SCANCODE_0 && keysym <= SDL_SCANCODE_9)
- main_state_set_slot(keysym - SDL_SCANCODE_0);
+ else if ((slot = get_saveslot_from_keysym(keysym)) >= 0)
+ main_state_set_slot(slot);
/* check all of the configurable commands */
else if (keysym == ConfigGetParamInt(l_CoreEventsConfig, kbdStop))
main_stop();
ConfigSetDefaultString(g_CoreConfig, "SaveStatePath", "", "Path to directory where emulator save states (snapshots) are saved. If this is blank, the default value of ${UserConfigPath}/save will be used");
ConfigSetDefaultString(g_CoreConfig, "SaveSRAMPath", "", "Path to directory where SRAM/EEPROM data (in-game saves) are stored. If this is blank, the default value of ${UserConfigPath}/save will be used");
ConfigSetDefaultString(g_CoreConfig, "SharedDataPath", "", "Path to a directory to search when looking for shared data files");
+ ConfigSetDefaultBool(g_CoreConfig, "DelaySI", 0, "Delay interrupt after DMA SI read/write");
+ ConfigSetDefaultInt(g_CoreConfig, "CountPerOp", 2, "Force number of cycles per emulated instruction");
/* handle upgrades */
if (bUpgrade)
savestates_set_autoinc_slot(ConfigGetParamBool(g_CoreConfig, "AutoStateSlotIncrement"));
savestates_select_slot(ConfigGetParamInt(g_CoreConfig, "CurrentStateSlot"));
no_compiled_jump = ConfigGetParamBool(g_CoreConfig, "NoCompiledJump");
+ if (delay_si==-1) delay_si = ConfigGetParamBool(g_CoreConfig, "DelaySI");
+ if (count_per_op==-1) count_per_op = ConfigGetParamInt(g_CoreConfig, "CountPerOp");
+ if (count_per_op <= 0)
+ count_per_op = 2;
- // initialize memory, and do byte-swapping if it's not been done yet
+ // initialize memory, and do byte-swapping if it's not been done yet
if (g_MemHasBeenBSwapped == 0)
{
init_memory(1);
}
#endif
}
-
-/*********************************************************************************************************
-* main function
-*/
-int main(int argc, char *argv[])
-{
- return 1;
-}
-
extern m64p_frame_callback g_FrameCallback;
+extern int delay_si;
+
const char* get_savestatepath(void);
const char* get_savesrampath(void);
unsigned char isGoldeneyeRom = 0;
+extern int count_per_op;
+
m64p_rom_header ROM_HEADER;
rom_params ROM_PARAMS;
m64p_rom_settings ROM_SETTINGS;
ROM_SETTINGS.status = entry->status;
ROM_SETTINGS.players = entry->players;
ROM_SETTINGS.rumble = entry->rumble;
+ delay_si = entry->delay_si;
+ count_per_op = entry->count_per_op;
}
else
{
ROM_SETTINGS.status = 0;
ROM_SETTINGS.players = 0;
ROM_SETTINGS.rumble = 0;
- }
+ delay_si = -1;
+ count_per_op = -1;
+ }
/* print out a bunch of info about the ROM */
DebugMessage(M64MSG_INFO, "Goodname: %s", ROM_SETTINGS.goodname);
DebugMessage(M64MSG_INFO, "Country: %s", buffer);
DebugMessage(M64MSG_VERBOSE, "PC = %x", sl((unsigned int)ROM_HEADER.PC));
DebugMessage(M64MSG_VERBOSE, "Save type: %d", ROM_SETTINGS.savetype);
+ if (delay_si>=0) DebugMessage(M64MSG_INFO, "Delay SI: %d", delay_si);
+ if (count_per_op>=0) DebugMessage(M64MSG_INFO, "Count Per OP: %d", count_per_op);
//Prepare Hack for GOLDENEYE
isGoldeneyeRom = 0;
search->entry.savetype = DEFAULT;
search->entry.players = DEFAULT;
search->entry.rumble = DEFAULT;
+ /*SEB*/
+ search->entry.delay_si=-1;
+ search->entry.count_per_op=-1;
search->next_entry = NULL;
search->next_crc = NULL;
else
DebugMessage(M64MSG_WARNING, "ROM Database: Invalid rumble string on line %i", lineno);
}
+ else if(!strcmp(l.name, "DelaySI"))
+ {
+ if(!strcmp(l.value, "True"))
+ search->entry.delay_si = 1;
+ else if(!strcmp(l.value, "False"))
+ search->entry.delay_si = 0;
+ else
+ DebugMessage(M64MSG_WARNING, "ROM Database: Invalid DelaySI string on line %i", lineno);
+ }
+ else if(!strcmp(l.name, "CountPerOp"))
+ {
+ if (string_to_int(l.value, &value) && value >= 0 && value < 8)
+ search->entry.count_per_op = value;
+ else
+ DebugMessage(M64MSG_WARNING, "ROM Database: Invalid CountPerOp on line %i", lineno);
+ }
else
{
DebugMessage(M64MSG_WARNING, "ROM Database: Unknown property on line %i", lineno);
unsigned char savetype;
unsigned char players; /* Local players 0-4, 2/3/4 way Netplay indicated by 5/6/7. */
unsigned char rumble; /* 0 - No, 1 - Yes boolean for rumble support. */
+ /*SEB*/
+ signed char delay_si; /* -1 = no value, 0 = off, 1 = on */
+ signed char count_per_op; /* -1 = no value, 0..3 = value */
} romdatabase_entry;
typedef struct _romdatabase_search
#include "main/util.h"
static unsigned char sram[0x8000];
+int delay_si = 0;
static char *get_sram_path(void)
{
update_pif_write();
update_count();
- add_interupt_event(SI_INT, /*0x100*/0x900);
+
+ if (delay_si) {
+ add_interupt_event(SI_INT, /*0x100*/0x900);
+ } else {
+ MI_register.mi_intr_reg |= 0x02; // SI
+ si_register.si_stat |= 0x1000; // INTERRUPT
+ check_interupt();
+ }
}
void dma_si_read(void)
}
update_count();
- add_interupt_event(SI_INT, /*0x100*/0x900);
+
+ if (delay_si) {
+ add_interupt_event(SI_INT, /*0x100*/0x900);
+ } else {
+ MI_register.mi_intr_reg |= 0x02; // SI
+ si_register.si_stat |= 0x1000; // INTERRUPT
+ check_interupt();
+ }
}
}
// calculate the proper response for the given challenge (X-Scale's algorithm)
n64_cic_nus_6105(challenge, response, CHL_LEN - 2);
+ PIF_RAMb[46] = 0;
+ PIF_RAMb[47] = 0;
// re-format the 'response' into a byte stream
for (i = 0; i < 15; i++)
{
case COMPARE_INT:
remove_interupt_event();
- Count+=2;
+ Count+=count_per_op;
add_interupt_event_count(COMPARE_INT, Compare);
- Count-=2;
+ Count-=count_per_op;
Cause = (Cause | 0x8000) & 0xFFFFFF83;
if ((Status & 7) != 1) return;
#define MAXBLOCK 4096
#define MAX_OUTPUT_BLOCK_SIZE 262144
-#define CLOCK_DIVIDER 2
+#define CLOCK_DIVIDER count_per_op
void *base_addr;
unsigned int r4300emu = 0;
int no_compiled_jump = 0;
+unsigned int count_per_op = 2;
int llbit, rompause;
#if NEW_DYNAREC != NEW_DYNAREC_ARM
int stop;
if (r4300emu != CORE_DYNAREC)
{
#endif
- Count = Count + (PC->addr - last_addr)/2;
+ Count += ((PC->addr - last_addr) >> 2) * count_per_op;
last_addr = PC->addr;
#ifdef NEW_DYNAREC
}
extern char invalid_code[0x100000];
extern unsigned int jump_to_address;
extern int no_compiled_jump;
+extern unsigned int count_per_op;
void init_blocks(void);
void free_blocks(void);
static void genupdate_count(unsigned int addr)
{
-#ifndef COMPARE_CORE
-#ifndef DBG
+#if !defined(COMPARE_CORE) && !defined(DBG)
mov_reg32_imm32(EAX, addr);
sub_reg32_m32(EAX, (unsigned int*)(&last_addr));
- shr_reg32_imm8(EAX, 1);
+ shr_reg32_imm8(EAX, 2);
+ mov_reg32_m32(EDX, &count_per_op);
+ mul_reg32(EDX);
add_m32_reg32((unsigned int*)(&Count), EAX);
#else
mov_m32_imm32((unsigned int*)(&PC), (unsigned int)(dst+1));
mov_reg32_imm32(EAX, (unsigned int)update_count);
call_reg32(EAX);
#endif
-#else
- mov_m32_imm32((unsigned int*)(&PC), (unsigned int)(dst+1));
- mov_reg32_imm32(EAX, (unsigned int)update_count);
- call_reg32(EAX);
-#endif
}
static void gencheck_interupt(unsigned int instr_structure)
#if !defined(COMPARE_CORE) && !defined(DBG)
mov_reg32_imm32(EAX, addr);
sub_xreg32_m32rel(EAX, (unsigned int*)(&last_addr));
- shr_reg32_imm8(EAX, 1);
+ shr_reg32_imm8(EAX, 2);
+ mov_xreg32_m32rel(EDX, (void*)&count_per_op);
+ mul_reg32(EDX);
add_m32rel_xreg32((unsigned int*)(&Count), EAX);
#else
mov_reg64_imm64(RAX, (unsigned long long) (dst+1));
LDFLAGS += -Wl,-version-script,$(SRCDIR)/rsp_api_export.ver
endif
ifeq ($(OS), OSX)
- # Select the proper SDK
- # Also, SDKs are stored in a different location since XCode 4.3
- OSX_SDK ?= $(shell sw_vers -productVersion | cut -f1 -f2 -d .)
- OSX_XCODEMAJ = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f1 -d .)
- OSX_XCODEMIN = $(shell xcodebuild -version | grep '[0-9]*\.[0-9]*' | cut -f2 -d ' ' | cut -f2 -d .)
- OSX_XCODEGE43 = $(shell echo "`expr $(OSX_XCODEMAJ) \>= 4``expr $(OSX_XCODEMIN) \>= 3`")
- ifeq ($(OSX_XCODEGE43), 11)
- OSX_SYSROOT := /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
- else
- OSX_SYSROOT := /Developer/SDKs
- endif
+ #xcode-select has been around since XCode 3.0, i.e. OS X 10.5
+ OSX_SDK_ROOT = $(shell xcode-select -print-path)/Platforms/MacOSX.platform/Developer/SDKs
+ OSX_SDK_PATH = $(OSX_SDK_ROOT)/$(shell ls $(OSX_SDK_ROOT) | tail -1)
ifeq ($(CPU), X86)
ifeq ($(ARCH_DETECTED), 64BITS)
- CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -arch x86_64 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
else
- CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=$(OSX_SDK) -isysroot $(OSX_SYSROOT)/MacOSX$(OSX_SDK).sdk
+ CFLAGS += -pipe -mmmx -msse -fomit-frame-pointer -arch i686 -mmacosx-version-min=10.5 -isysroot $(OSX_SDK_PATH)
LDFLAGS += -read_only_relocs suppress
endif
endif
rebuild: clean all
# build dependency files
-CFLAGS += -MD
+CFLAGS += -MD -MP
-include $(OBJECTS:.o=.d)
CXXFLAGS += $(CFLAGS)
#define SUBBLOCK_SIZE 64
typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address);
-typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
+typedef void (*subblock_transform_t)(int16_t* dst, const int16_t* src);
/* rdram operations */
// FIXME: these functions deserve their own module
static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count);
/* standard jpeg ucode decoder */
-static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line);
+static void jpeg_decode_std(const char * const version,
+ const subblock_transform_t transform_luma,
+ const subblock_transform_t transform_chroma,
+ const tile_line_emitter_t emit_line);
/* helper functions */
static uint8_t clamp_u8(int16_t x);
static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address);
/* macroblocks operations */
-static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable);
-static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
-static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
+static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable);
+static void decode_macroblock_std(
+ const subblock_transform_t transform_luma,
+ const subblock_transform_t transform_chroma,
+ int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
**************************************************************************/
void jpeg_decode_PS0()
{
- jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine);
+ jpeg_decode_std("PS0", RescaleYSubBlock, RescaleUVSubBlock, EmitYUVTileLine);
}
/***************************************************************************
**************************************************************************/
void jpeg_decode_PS()
{
- jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine);
+ jpeg_decode_std("PS", NULL, NULL, EmitRGBATileLine);
}
/***************************************************************************
int16_t macroblock[6*SUBBLOCK_SIZE];
rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE);
- DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL);
+ decode_macroblock_ob(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL);
EmitTilesMode2(EmitYUVTileLine, macroblock, address);
address += (2*6*SUBBLOCK_SIZE);
/* local functions */
-static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line)
+static void jpeg_decode_std(const char * const version,
+ const subblock_transform_t transform_luma,
+ const subblock_transform_t transform_chroma,
+ const tile_line_emitter_t emit_line)
{
int16_t qtables[3][SUBBLOCK_SIZE];
unsigned int mb;
uint32_t qtableV_ptr;
unsigned int subblock_count;
unsigned int macroblock_size;
- int16_t *macroblock;
+ int16_t macroblock[6*SUBBLOCK_SIZE]; /* macroblock contains at most 6 subblobcks */
const OSTask_t * const task = get_task();
if (task->flags & 0x1)
}
subblock_count = mode + 4;
- macroblock_size = 2*subblock_count*SUBBLOCK_SIZE;
+ macroblock_size = subblock_count*SUBBLOCK_SIZE;
rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE);
rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE);
rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE);
- macroblock = malloc(sizeof(*macroblock) * macroblock_size);
- if (!macroblock)
- {
- DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version);
- return;
- }
-
for (mb = 0; mb < macroblock_count; ++mb)
{
- rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1);
- decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables);
+ rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size);
+ decode_macroblock_std(transform_luma, transform_chroma,
+ macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables);
if (mode == 0)
{
EmitTilesMode2(emit_line, macroblock, address);
}
- address += macroblock_size;
+ address += 2*macroblock_size;
}
- free(macroblock);
}
static uint8_t clamp_u8(int16_t x)
}
}
-static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable)
+static void decode_macroblock_ob(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable)
{
int sb;
}
}
-static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
-{
- unsigned int sb;
- unsigned int q = 0;
-
- for (sb = 0; sb < subblock_count; ++sb)
- {
- int16_t tmp_sb[SUBBLOCK_SIZE];
- const int isChromaSubBlock = (subblock_count - sb <= 2);
-
- if (isChromaSubBlock) { ++q; }
-
- MultSubBlocks(macroblock, macroblock, qtables[q], 4);
- ZigZagSubBlock(tmp_sb, macroblock);
- InverseDCTSubBlock(macroblock, tmp_sb);
-
- macroblock += SUBBLOCK_SIZE;
- }
-
-}
-
-static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
+static void decode_macroblock_std(
+ const subblock_transform_t transform_luma,
+ const subblock_transform_t transform_chroma,
+ int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
{
unsigned int sb;
unsigned int q = 0;
if (isChromaSubBlock)
{
- RescaleUVSubBlock(macroblock, macroblock);
+ if (transform_chroma != NULL)
+ transform_chroma(macroblock, macroblock);
}
else
{
- RescaleYSubBlock(macroblock, macroblock);
+ if (transform_luma != NULL)
+ transform_luma(macroblock, macroblock);
}
macroblock += SUBBLOCK_SIZE;
case 0x2caa6: jpeg_decode_PS(); return;
/* JPEG: found in Ogre Battle, Bottom of the 9th */
- case 0x130de: jpeg_decode_OB(); return;
+ case 0x130de:
+ case 0x278b0:
+ jpeg_decode_OB(); return;
}
handle_unknown_task(sum);
--- /dev/null
+Mupen64Plus-Video-Rice INSTALL
+------------------------------
+
+This text file was written to explain the installation process of the
+Mupen64Plus-Video-Rice module.
+
+If this module is part of a Mupen64Plus source code bundle, the user should run
+the "m64p_install.sh" script in the root of the unzipped bundle to install all
+of the included modules in the bundle.
+
+If this module is a standalone source code release, you should build the library
+from source code and install it via the makefile, like this:
+
+$ cd projects/unix
+$ make all
+$ sudo make install
+
+If you want to build the Mupen64Plus-Video-Rice module for installation in a
+home folder for a single user, you may build it like this (replacing
+<my-folder> with your desired local installation path):
+
+$ cd projects/unix
+$ make all
+$ make install LIBDIR=<my-folder> SHAREDIR=<my-folder>
+
+
endif
# base CFLAGS, LDLIBS, and LDFLAGS
-OPTFLAGS ?= -O3 -flto -fuse-linker-plugin
+OPTFLAGS ?= -O3
+#-flto -fuse-linker-plugin
WARNFLAGS ?= -Wall
CFLAGS += $(OPTFLAGS) $(WARNFLAGS) -ffast-math -fno-strict-aliasing -fvisibility=hidden -I../../src
CXXFLAGS += -fvisibility-inlines-hidden