From f0d65622dedaa452e6e6ea1046932068e8be5c0f Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 28 Oct 2024 02:27:39 +0200 Subject: [PATCH] gpu_unai: minor tuning for 3ds not much of a difference, but at least avoids s_invTable --- Makefile | 2 +- frontend/libretro-rthreads.c | 7 +++-- plugins/gpu_unai/gpu_fixedpoint.h | 2 +- plugins/gpu_unai/gpu_raster_polygon.h | 14 +++++----- plugins/gpu_unai/gpulib_if.cpp | 2 +- plugins/gpu_unai/old/gpu_fixedpoint.h | 32 ++++++++++++++++++++--- plugins/gpu_unai/old/gpu_raster_polygon.h | 6 ++--- 7 files changed, 47 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 39b5fbaf..5d885066 100644 --- a/Makefile +++ b/Makefile @@ -288,7 +288,7 @@ OBJS += $(LCHDR)/src/libchdr_cdrom.o OBJS += $(LCHDR)/src/libchdr_chd.o OBJS += $(LCHDR)/src/libchdr_flac.o OBJS += $(LCHDR)/src/libchdr_huffman.o -$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -std=gnu11 +$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -Wno-format -std=gnu11 OBJS += $(LCHDR_LZMA)/src/Alloc.o OBJS += $(LCHDR_LZMA)/src/CpuArch.o OBJS += $(LCHDR_LZMA)/src/Delta.o diff --git a/frontend/libretro-rthreads.c b/frontend/libretro-rthreads.c index 82af3ef2..90067b13 100644 --- a/frontend/libretro-rthreads.c +++ b/frontend/libretro-rthreads.c @@ -24,6 +24,8 @@ void pcsxr_sthread_init(void) SysPrintf("%d cpu core(s) detected\n", cpu_features_get_core_amount()); #ifdef _3DS int64_t version = 0; + int fpscr = -1; + APT_CheckNew3DS(&is_new_3ds); svcGetSystemInfo(&version, 0x10000, 0); @@ -31,9 +33,10 @@ void pcsxr_sthread_init(void) u32 percent = -1; APT_GetAppCpuTimeLimit(&percent); - SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld\n", + __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); + SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld fpscr=%08x\n", is_new_3ds ? "new" : "old", (int)GET_VERSION_MAJOR(version), - (int)GET_VERSION_MINOR(version), percent); + (int)GET_VERSION_MINOR(version), percent, fpscr); #endif } diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h index f809905e..364321b7 100644 --- a/plugins/gpu_unai/gpu_fixedpoint.h +++ b/plugins/gpu_unai/gpu_fixedpoint.h @@ -75,7 +75,7 @@ INLINE float FloatInv(const float x) /////////////////////////////////////////////////////////////////////////// // --- BEGIN INVERSE APPROXIMATION SECTION --- /////////////////////////////////////////////////////////////////////////// -#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD) +#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH)) // big precision inverse table. #define TABLE_BITS 16 diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index ff6dc00d..1b9e08dc 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -257,7 +257,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad x3 = x4 = i2x(x0); if (dx < 0) { #ifdef GPU_UNAI_USE_FLOATMATH -#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; #else @@ -275,7 +275,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad #endif } else { #ifdef GPU_UNAI_USE_FLOATMATH -#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; #else @@ -303,7 +303,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad x3 = i2x(x0) + (dx3 * (y1 - y0)); x4 = i2x(x1); #ifdef GPU_UNAI_USE_FLOATMATH -#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; #else dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; @@ -319,7 +319,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad x3 = i2x(x1); x4 = i2x(x0) + (dx4 * (y1 - y0)); #ifdef GPU_UNAI_USE_FLOATMATH -#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; #else dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; @@ -581,7 +581,7 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua v3 += (dv3 * (y1 - y0)); } #ifdef GPU_UNAI_USE_FLOATMATH -#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; #else dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; @@ -920,7 +920,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad } #ifdef GPU_UNAI_USE_FLOATMATH -#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; #else dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; @@ -1305,7 +1305,7 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua } #ifdef GPU_UNAI_USE_FLOATMATH -#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; #else dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index be6b6c9e..47289a3a 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -254,7 +254,7 @@ int renderer_init(void) //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack; gpu_unai.ilace_mask = gpu_unai.config.ilace_force; -#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD) +#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH)) // s_invTable for(int i=1;i<=(1<>1)) -// big precision inverse table. -extern s32 s_invTable[(1<>FIXED_BITS); } @@ -57,12 +54,39 @@ INLINE u32 Log2(u32 _a) } */ +#ifdef GPU_UNAI_USE_FLOATMATH + +#define inv_type float + +INLINE void xInv (const fixed _b, float & factor_, float & shift_) +{ + factor_ = 1.0f / _b; + shift_ = 0.0f; // not used +} + +INLINE fixed xInvMulx (const fixed _a, const float fact, const float shift) +{ + return (fixed)((_a << FIXED_BITS) * fact); +} + +INLINE fixed xLoDivx (const fixed _a, const fixed _b) +{ + return (fixed)((_a << FIXED_BITS) / (float)_b); +} + +#else + +#define inv_type s32 + #ifdef HAVE_ARMV5 INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; } #else INLINE u32 Log2(u32 x) { u32 i = 0; for ( ; x > 0; ++i, x >>= 1); return i - 1; } #endif +// big precision inverse table. +extern s32 s_invTable[(1< INLINE T Min2 (const T _a, const T _b) { return (_a<_b)?_a:_b; } diff --git a/plugins/gpu_unai/old/gpu_raster_polygon.h b/plugins/gpu_unai/old/gpu_raster_polygon.h index c4b03509..fcd1f6ce 100644 --- a/plugins/gpu_unai/old/gpu_raster_polygon.h +++ b/plugins/gpu_unai/old/gpu_raster_polygon.h @@ -245,7 +245,7 @@ void gpuDrawFT3(const PP gpuPolySpanDriver) du4 = (u2 - u1) * ya - (u2 - u0) * yb; dv4 = (v2 - v1) * ya - (v2 - v0) * yb; - s32 iF,iS; + inv_type iF,iS; xInv( dx, iF, iS); du4 = xInvMulx( du4, iF, iS); dv4 = xInvMulx( dv4, iF, iS); @@ -425,7 +425,7 @@ void gpuDrawG3(const PP gpuPolySpanDriver) dg4 = (g2 - g1) * ya - (g2 - g0) * yb; db4 = (b2 - b1) * ya - (b2 - b0) * yb; - s32 iF,iS; + inv_type iF,iS; xInv( dx, iF, iS); dr4 = xInvMulx( dr4, iF, iS); dg4 = xInvMulx( dg4, iF, iS); @@ -619,7 +619,7 @@ void gpuDrawGT3(const PP gpuPolySpanDriver) dg4 = (g2 - g1) * ya - (g2 - g0) * yb; db4 = (b2 - b1) * ya - (b2 - b0) * yb; - s32 iF,iS; + inv_type iF,iS; xInv( dx, iF, iS); du4 = xInvMulx( du4, iF, iS); -- 2.39.5