gpu_neon: adjust some comments and things

author notaz <notasas@gmail.com>

Tue, 16 Aug 2022 21:11:39 +0000 (00:11 +0300)

committer notaz <notasas@gmail.com>

Thu, 15 Sep 2022 17:19:06 +0000 (20:19 +0300)
author notaz <notasas@gmail.com>
Tue, 16 Aug 2022 21:11:39 +0000 (00:11 +0300)
committer notaz <notasas@gmail.com>
Thu, 15 Sep 2022 17:19:06 +0000 (20:19 +0300)
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c

index 1d513d8..51ad152 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -22,6 +22,13 @@
  #endif
  #include "psx_gpu_simd.h"
  
  #endif
  #include "psx_gpu_simd.h"
  
+#if 0
+void dump_r_d(const char *name, void *dump);
+void dump_r_q(const char *name, void *dump);
+#define dumprd(n) dump_r_d(#n, n.e)
+#define dumprq(n) dump_r_q(#n, n.e)
+#endif
+
  u32 span_pixels = 0;
  u32 span_pixel_blocks = 0;
  u32 spans = 0;
  u32 span_pixels = 0;
  u32 span_pixel_blocks = 0;
  u32 spans = 0;
@@ -769,13 +776,13 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
  {                                                                              \
    u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data;          \
    if (_num_spans > MAX_SPANS)                                                  \
  {                                                                              \
    u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data;          \
    if (_num_spans > MAX_SPANS)                                                  \
-    *(int *)0 = 1;                                                             \
+    *(volatile int *)0 = 1;                                                    \
    if (_num_spans < psx_gpu->num_spans)                                         \
    {                                                                            \
      if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW)                 \
    if (_num_spans < psx_gpu->num_spans)                                         \
    {                                                                            \
      if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW)                 \
-      *(int *)0 = 1;                                                           \
-    if(span_edge_data_element.y > 2048)                                        \
-      *(int *)0 = 1;                                                           \
+      *(volatile int *)0 = 2;                                                  \
+    if(span_edge_data_element.y >= 2048)                                       \
+      *(volatile int *)0 = 3;                                                  \
    }                                                                            \
  }                                                                              \
  
    }                                                                            \
  }                                                                              \
  
@@ -788,7 +795,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
    vec_2x64s alternate_x;                                                       \
    vec_2x64s alternate_dx_dy;                                                   \
    vec_4x32s alternate_x_32;                                                    \
    vec_2x64s alternate_x;                                                       \
    vec_2x64s alternate_dx_dy;                                                   \
    vec_4x32s alternate_x_32;                                                    \
-  vec_2x32s alternate_x_16;                                                    \
+  vec_4x16u alternate_x_16;                                                    \
                                                                                 \
    vec_4x16u alternate_select;                                                  \
    vec_4x16s y_mid_point;                                                       \
                                                                                 \
    vec_4x16u alternate_select;                                                  \
    vec_4x16s y_mid_point;                                                       \
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S

index da47756..c62c1ba 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -369,8 +369,8 @@ function(compute_all_gradients)
    sub r14, r14, #(62 - 12)           @ r14 = shift - (62 - FIXED_BITS)
  
    vshll.u16 uvrg_base, uvrg0, #16    @ uvrg_base = uvrg0 << 16
    sub r14, r14, #(62 - 12)           @ r14 = shift - (62 - FIXED_BITS)
  
    vshll.u16 uvrg_base, uvrg0, #16    @ uvrg_base = uvrg0 << 16
-  vdup.u32 r_shift, r14              @ r_shift = { shift, shift, shift, shift }
-
+  vdup.u32 r_shift, r14              @ r_shift = { shift, shift*, shift, shift* }
+                                     @ * - vshl.u64: ignored by hw
    vadd.u32 uvrg_base, uvrgb_phase
    vabs.s32 ga_uvrg_x, ga_uvrg_x      @ ga_uvrg_x = abs(ga_uvrg_x)
  
    vadd.u32 uvrg_base, uvrgb_phase
    vabs.s32 ga_uvrg_x, ga_uvrg_x      @ ga_uvrg_x = abs(ga_uvrg_x)
  
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c

index 5c05b14..bbeccb7 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c
@@ -313,7 +313,7 @@ typedef union
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
-static int ccount;
+static int ccount, dump_enabled;
  void cmpp(const char *name, const void *a_, const void *b_, size_t len)
  {
    const uint32_t *a = a_, *b = b_, masks[] = { 0, 0xff, 0xffff, 0xffffff };
  void cmpp(const char *name, const void *a_, const void *b_, size_t len)
  {
    const uint32_t *a = a_, *b = b_, masks[] = { 0, 0xff, 0xffff, 0xffffff };
@@ -336,8 +336,9 @@ void cmpp(const char *name, const void *a_, const void *b_, size_t len)
  void dump_r_(const char *name, void *dump, int is_q)
  {
    unsigned long long *u = dump;
  void dump_r_(const char *name, void *dump, int is_q)
  {
    unsigned long long *u = dump;
+  if (!dump_enabled) return;
    //if (ccount > 1) return;
    //if (ccount > 1) return;
-  printf("%10s %016llx ", name, u[0]);
+  printf("%20s %016llx ", name, u[0]);
    if (is_q)
      printf("%016llx", u[1]);
    puts("");
    if (is_q)
      printf("%016llx", u[1]);
    puts("");
@@ -497,7 +498,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
  
    gvreg uvrg_base;
    gvshll_n_u16(uvrg_base, gvlo(uvrg_xxxx0), 16); // uvrg_base = uvrg0 << 16
  
    gvreg uvrg_base;
    gvshll_n_u16(uvrg_base, gvlo(uvrg_xxxx0), 16); // uvrg_base = uvrg0 << 16
-  gvdupq_n_u32(r_shift, shift);         // r_shift = { shift, shift, shift, shift }
+  gvdupq_n_s64(r_shift, shift);         // r_shift = { shift, shift }
  
    gvaddq_u32(uvrg_base, uvrg_base, uvrgb_phase);
    gvabsq_s32(ga_uvrg_x, ga_uvrg_x);     // ga_uvrg_x = abs(ga_uvrg_x)
  
    gvaddq_u32(uvrg_base, uvrg_base, uvrgb_phase);
    gvabsq_s32(ga_uvrg_x, ga_uvrg_x);     // ga_uvrg_x = abs(ga_uvrg_x)
@@ -600,7 +601,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
    vec_2x64s alternate_x;                                                       \
    vec_2x64s alternate_dx_dy;                                                   \
    vec_4x32s alternate_x_32;                                                    \
    vec_2x64s alternate_x;                                                       \
    vec_2x64s alternate_dx_dy;                                                   \
    vec_4x32s alternate_x_32;                                                    \
-  vec_2x32s alternate_x_16;                                                    \
+  vec_4x16u alternate_x_16;                                                    \
                                                                                 \
    vec_4x16u alternate_select;                                                  \
    vec_4x16s y_mid_point;                                                       \
                                                                                 \
    vec_4x16u alternate_select;                                                  \
    vec_4x16s y_mid_point;                                                       \
diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h

index 189eb79..6f2bcbf 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/vector_ops.h
+++ b/plugins/gpu_neon/psx_gpu/vector_ops.h
@@ -103,7 +103,7 @@
    foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift))            \
  
  #define shr_4x16b(dest, source, shift)                                         \
    foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift))            \
  
  #define shr_4x16b(dest, source, shift)                                         \
-  foreach_element(4, (dest).e[_i] = (source).e[_i] >> (shift))                 \
+  foreach_element(4, (dest).e[_i] = (u16)(source).e[_i] >> (shift))            \
  
  #define shl_4x16b(dest, source, shift)                                         \
    foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift))            \
  
  #define shl_4x16b(dest, source, shift)                                         \
    foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift))            \
author	notaz <notasas@gmail.com>
	Tue, 16 Aug 2022 21:11:39 +0000 (00:11 +0300)
committer	notaz <notasas@gmail.com>
	Thu, 15 Sep 2022 17:19:06 +0000 (20:19 +0300)
plugins/gpu_neon/psx_gpu/psx_gpu.c		patch \| blob \| blame \| history
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S		patch \| blob \| blame \| history
plugins/gpu_neon/psx_gpu/psx_gpu_simd.c		patch \| blob \| blame \| history
plugins/gpu_neon/psx_gpu/vector_ops.h		patch \| blob \| blame \| history