OBJS += $(LCHDR)/src/libchdr_chd.o
OBJS += $(LCHDR)/src/libchdr_flac.o
OBJS += $(LCHDR)/src/libchdr_huffman.o
-$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -std=gnu11
+$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -Wno-format -std=gnu11
OBJS += $(LCHDR_LZMA)/src/Alloc.o
OBJS += $(LCHDR_LZMA)/src/CpuArch.o
OBJS += $(LCHDR_LZMA)/src/Delta.o
SysPrintf("%d cpu core(s) detected\n", cpu_features_get_core_amount());
#ifdef _3DS
int64_t version = 0;
+ int fpscr = -1;
+
APT_CheckNew3DS(&is_new_3ds);
svcGetSystemInfo(&version, 0x10000, 0);
u32 percent = -1;
APT_GetAppCpuTimeLimit(&percent);
- SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld\n",
+ __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
+ SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld fpscr=%08x\n",
is_new_3ds ? "new" : "old", (int)GET_VERSION_MAJOR(version),
- (int)GET_VERSION_MINOR(version), percent);
+ (int)GET_VERSION_MINOR(version), percent, fpscr);
#endif
}
///////////////////////////////////////////////////////////////////////////
// --- BEGIN INVERSE APPROXIMATION SECTION ---
///////////////////////////////////////////////////////////////////////////
-#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD)
+#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH))
// big precision inverse table.
#define TABLE_BITS 16
x3 = x4 = i2x(x0);
if (dx < 0) {
#ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
#else
#endif
} else {
#ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
#else
x3 = i2x(x0) + (dx3 * (y1 - y0));
x4 = i2x(x1);
#ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
#else
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
x3 = i2x(x1);
x4 = i2x(x0) + (dx4 * (y1 - y0));
#ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
#else
dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
v3 += (dv3 * (y1 - y0));
}
#ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
#else
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
}
#ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
#else
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
}
#ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
#else
dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
//gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack;
gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
-#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD)
+#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH))
// s_invTable
for(int i=1;i<=(1<<TABLE_BITS);++i)
{
#define fixed_TWO ((fixed)2<<FIXED_BITS)
#define fixed_HALF ((fixed)((1<<FIXED_BITS)>>1))
-// big precision inverse table.
-extern s32 s_invTable[(1<<TABLE_BITS)];
-
INLINE fixed i2x(const int _x) { return ((_x)<<FIXED_BITS); }
INLINE fixed x2i(const fixed _x) { return ((_x)>>FIXED_BITS); }
}
*/
+#ifdef GPU_UNAI_USE_FLOATMATH
+
+#define inv_type float
+
+INLINE void xInv (const fixed _b, float & factor_, float & shift_)
+{
+ factor_ = 1.0f / _b;
+ shift_ = 0.0f; // not used
+}
+
+INLINE fixed xInvMulx (const fixed _a, const float fact, const float shift)
+{
+ return (fixed)((_a << FIXED_BITS) * fact);
+}
+
+INLINE fixed xLoDivx (const fixed _a, const fixed _b)
+{
+ return (fixed)((_a << FIXED_BITS) / (float)_b);
+}
+
+#else
+
+#define inv_type s32
+
#ifdef HAVE_ARMV5
INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; }
#else
INLINE u32 Log2(u32 x) { u32 i = 0; for ( ; x > 0; ++i, x >>= 1); return i - 1; }
#endif
+// big precision inverse table.
+extern s32 s_invTable[(1<<TABLE_BITS)];
+
#ifdef GPU_TABLE_10_BITS
INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
{
return xInvMulx(_a, iFact, iShift);
}
+#endif // GPU_UNAI_USE_FLOATMATH
+
///////////////////////////////////////////////////////////////////////////
template<typename T>
INLINE T Min2 (const T _a, const T _b) { return (_a<_b)?_a:_b; }
du4 = (u2 - u1) * ya - (u2 - u0) * yb;
dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
- s32 iF,iS;
+ inv_type iF,iS;
xInv( dx, iF, iS);
du4 = xInvMulx( du4, iF, iS);
dv4 = xInvMulx( dv4, iF, iS);
dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
db4 = (b2 - b1) * ya - (b2 - b0) * yb;
- s32 iF,iS;
+ inv_type iF,iS;
xInv( dx, iF, iS);
dr4 = xInvMulx( dr4, iF, iS);
dg4 = xInvMulx( dg4, iF, iS);
dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
db4 = (b2 - b1) * ya - (b2 - b0) * yb;
- s32 iF,iS;
+ inv_type iF,iS;
xInv( dx, iF, iS);
du4 = xInvMulx( du4, iF, iS);