gpu_unai: minor tuning for 3ds

author notaz <notasas@gmail.com>

Mon, 28 Oct 2024 00:27:39 +0000 (02:27 +0200)

committer notaz <notasas@gmail.com>

Fri, 1 Nov 2024 00:52:10 +0000 (02:52 +0200)
author notaz <notasas@gmail.com>
Mon, 28 Oct 2024 00:27:39 +0000 (02:27 +0200)
committer notaz <notasas@gmail.com>
Fri, 1 Nov 2024 00:52:10 +0000 (02:52 +0200)
diff --git a/Makefile b/Makefile

index 39b5fba..5d88506 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -288,7 +288,7 @@ OBJS += $(LCHDR)/src/libchdr_cdrom.o
  OBJS += $(LCHDR)/src/libchdr_chd.o
  OBJS += $(LCHDR)/src/libchdr_flac.o
  OBJS += $(LCHDR)/src/libchdr_huffman.o
-$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -std=gnu11
+$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -Wno-format -std=gnu11
  OBJS += $(LCHDR_LZMA)/src/Alloc.o
  OBJS += $(LCHDR_LZMA)/src/CpuArch.o
  OBJS += $(LCHDR_LZMA)/src/Delta.o
diff --git a/frontend/libretro-rthreads.c b/frontend/libretro-rthreads.c

index 82af3ef..90067b1 100644 (file)
--- a/frontend/libretro-rthreads.c
+++ b/frontend/libretro-rthreads.c
@@ -24,6 +24,8 @@ void pcsxr_sthread_init(void)
         SysPrintf("%d cpu core(s) detected\n", cpu_features_get_core_amount());
  #ifdef _3DS
         int64_t version = 0;
+       int fpscr = -1;
+
         APT_CheckNew3DS(&is_new_3ds);
         svcGetSystemInfo(&version, 0x10000, 0);
  
@@ -31,9 +33,10 @@ void pcsxr_sthread_init(void)
         u32 percent = -1;
         APT_GetAppCpuTimeLimit(&percent);
  
-       SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld\n",
+       __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
+       SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld fpscr=%08x\n",
                 is_new_3ds ? "new" : "old", (int)GET_VERSION_MAJOR(version),
-               (int)GET_VERSION_MINOR(version), percent);
+               (int)GET_VERSION_MINOR(version), percent, fpscr);
  #endif
  }
  
diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h

index f809905..364321b 100644 (file)
--- a/plugins/gpu_unai/gpu_fixedpoint.h
+++ b/plugins/gpu_unai/gpu_fixedpoint.h
@@ -75,7 +75,7 @@ INLINE float FloatInv(const float x)
  ///////////////////////////////////////////////////////////////////////////
  // --- BEGIN INVERSE APPROXIMATION SECTION ---
  ///////////////////////////////////////////////////////////////////////////
-#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD)
+#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH))
  
  //  big precision inverse table.
  #define TABLE_BITS 16
diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h

index ff6dc00..1b9e08d 100644 (file)
--- a/plugins/gpu_unai/gpu_raster_polygon.h
+++ b/plugins/gpu_unai/gpu_raster_polygon.h
@@ -257,7 +257,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
                                 x3 = x4 = i2x(x0);
                                 if (dx < 0) {
  #ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
                                         dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
                                         dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
  #else
@@ -275,7 +275,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
  #endif
                                 } else {
  #ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
                                         dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0;
                                         dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0;
  #else
@@ -303,7 +303,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
                                         x3 = i2x(x0) + (dx3 * (y1 - y0));
                                         x4 = i2x(x1);
  #ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
  #else
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
@@ -319,7 +319,7 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
                                         x3 = i2x(x1);
                                         x4 = i2x(x0) + (dx4 * (y1 - y0));
  #ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
  #else
                                         dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
@@ -581,7 +581,7 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua
                                                 v3 += (dv3 * (y1 - y0));
                                         }
  #ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
  #else
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
@@ -920,7 +920,7 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad
                                         }
  
  #ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
  #else
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
@@ -1305,7 +1305,7 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua
                                         }
  
  #ifdef GPU_UNAI_USE_FLOATMATH
-#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV
+#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV_FOR_ONE
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0;
  #else
                                         dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0;
diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp

index be6b6c9..47289a3 100644 (file)
--- a/plugins/gpu_unai/gpulib_if.cpp
+++ b/plugins/gpu_unai/gpulib_if.cpp
@@ -254,7 +254,7 @@ int renderer_init(void)
    //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack;
    gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
  
-#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || !defined(GPU_UNAI_NO_OLD)
+#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH))
    // s_invTable
    for(int i=1;i<=(1<<TABLE_BITS);++i)
    {
diff --git a/plugins/gpu_unai/old/gpu_fixedpoint.h b/plugins/gpu_unai/old/gpu_fixedpoint.h

index 5dae806..4ae3ed0 100644 (file)
--- a/plugins/gpu_unai/old/gpu_fixedpoint.h
+++ b/plugins/gpu_unai/old/gpu_fixedpoint.h
@@ -38,9 +38,6 @@ typedef s32 fixed;
  #define fixed_TWO  ((fixed)2<<FIXED_BITS)
  #define fixed_HALF ((fixed)((1<<FIXED_BITS)>>1))
  
-//  big precision inverse table.
-extern s32 s_invTable[(1<<TABLE_BITS)];
-
  INLINE  fixed i2x(const int   _x) { return  ((_x)<<FIXED_BITS); }
  INLINE  fixed x2i(const fixed _x) { return  ((_x)>>FIXED_BITS); }
  
@@ -57,12 +54,39 @@ INLINE u32 Log2(u32 _a)
  }
  */
  
+#ifdef GPU_UNAI_USE_FLOATMATH
+
+#define inv_type float
+
+INLINE  void  xInv (const fixed _b, float & factor_, float & shift_)
+{
+       factor_ = 1.0f / _b;
+       shift_ = 0.0f; // not used
+}
+
+INLINE  fixed xInvMulx  (const fixed _a, const float fact, const float shift)
+{
+       return (fixed)((_a << FIXED_BITS) * fact);
+}
+
+INLINE  fixed xLoDivx   (const fixed _a, const fixed _b)
+{
+       return (fixed)((_a << FIXED_BITS) / (float)_b);
+}
+
+#else
+
+#define inv_type s32
+
  #ifdef HAVE_ARMV5
  INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; }
  #else
  INLINE u32 Log2(u32 x) { u32 i = 0; for ( ; x > 0; ++i, x >>= 1); return i - 1; }
  #endif
  
+//  big precision inverse table.
+extern s32 s_invTable[(1<<TABLE_BITS)];
+
  #ifdef GPU_TABLE_10_BITS
  INLINE  void  xInv (const fixed _b, s32& iFactor_, s32& iShift_)
  {
@@ -113,6 +137,8 @@ INLINE  fixed xLoDivx   (const fixed _a, const fixed _b)
    return xInvMulx(_a, iFact, iShift);
  }
  
+#endif // GPU_UNAI_USE_FLOATMATH
+
  ///////////////////////////////////////////////////////////////////////////
  template<typename T>
  INLINE  T Min2 (const T _a, const T _b)             { return (_a<_b)?_a:_b; }
diff --git a/plugins/gpu_unai/old/gpu_raster_polygon.h b/plugins/gpu_unai/old/gpu_raster_polygon.h

index c4b0350..fcd1f6c 100644 (file)
--- a/plugins/gpu_unai/old/gpu_raster_polygon.h
+++ b/plugins/gpu_unai/old/gpu_raster_polygon.h
@@ -245,7 +245,7 @@ void gpuDrawFT3(const PP gpuPolySpanDriver)
         du4 = (u2 - u1) * ya - (u2 - u0) * yb;
         dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
  
-       s32 iF,iS;
+       inv_type iF,iS;
         xInv( dx, iF, iS);
         du4 = xInvMulx( du4, iF, iS);
         dv4 = xInvMulx( dv4, iF, iS);
@@ -425,7 +425,7 @@ void gpuDrawG3(const PP gpuPolySpanDriver)
         dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
         db4 = (b2 - b1) * ya - (b2 - b0) * yb;
  
-       s32 iF,iS;
+       inv_type iF,iS;
         xInv(            dx, iF, iS);
         dr4 = xInvMulx( dr4, iF, iS);
         dg4 = xInvMulx( dg4, iF, iS);
@@ -619,7 +619,7 @@ void gpuDrawGT3(const PP gpuPolySpanDriver)
         dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
         db4 = (b2 - b1) * ya - (b2 - b0) * yb;
  
-       s32 iF,iS;
+       inv_type iF,iS;
  
         xInv(            dx, iF, iS);
         du4 = xInvMulx( du4, iF, iS);
author	notaz <notasas@gmail.com>
	Mon, 28 Oct 2024 00:27:39 +0000 (02:27 +0200)
committer	notaz <notasas@gmail.com>
	Fri, 1 Nov 2024 00:52:10 +0000 (02:52 +0200)
Makefile		patch \| blob \| blame \| history
frontend/libretro-rthreads.c		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_fixedpoint.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpu_raster_polygon.h		patch \| blob \| blame \| history
plugins/gpu_unai/gpulib_if.cpp		patch \| blob \| blame \| history
plugins/gpu_unai/old/gpu_fixedpoint.h		patch \| blob \| blame \| history
plugins/gpu_unai/old/gpu_raster_polygon.h		patch \| blob \| blame \| history