notaz.gp2x.de
/
pcsx_rearmed.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
4d384ca
)
gpu_neon: adjust some comments and things
author
notaz
<notasas@gmail.com>
Tue, 16 Aug 2022 21:11:39 +0000
(
00:11
+0300)
committer
notaz
<notasas@gmail.com>
Thu, 15 Sep 2022 17:19:06 +0000
(20:19 +0300)
plugins/gpu_neon/psx_gpu/psx_gpu.c
patch
|
blob
|
blame
|
history
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
patch
|
blob
|
blame
|
history
plugins/gpu_neon/psx_gpu/psx_gpu_simd.c
patch
|
blob
|
blame
|
history
plugins/gpu_neon/psx_gpu/vector_ops.h
patch
|
blob
|
blame
|
history
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu.c
b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index
1d513d8
..
51ad152
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu.c
@@
-22,6
+22,13
@@
#endif
#include "psx_gpu_simd.h"
#endif
#include "psx_gpu_simd.h"
+#if 0
+void dump_r_d(const char *name, void *dump);
+void dump_r_q(const char *name, void *dump);
+#define dumprd(n) dump_r_d(#n, n.e)
+#define dumprq(n) dump_r_q(#n, n.e)
+#endif
+
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
u32 spans = 0;
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
u32 spans = 0;
@@
-769,13
+776,13
@@
void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
{ \
u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \
if (_num_spans > MAX_SPANS) \
{ \
u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \
if (_num_spans > MAX_SPANS) \
- *(
int *)0 = 1;
\
+ *(
volatile int *)0 = 1;
\
if (_num_spans < psx_gpu->num_spans) \
{ \
if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \
if (_num_spans < psx_gpu->num_spans) \
{ \
if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \
- *(
int *)0 = 1;
\
- if(span_edge_data_element.y >
2048)
\
- *(
int *)0 = 1;
\
+ *(
volatile int *)0 = 2;
\
+ if(span_edge_data_element.y >
= 2048)
\
+ *(
volatile int *)0 = 3;
\
} \
} \
} \
} \
@@
-788,7
+795,7
@@
void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
- vec_
2x32s
alternate_x_16; \
+ vec_
4x16u
alternate_x_16; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index
da47756
..
c62c1ba
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@
-369,8
+369,8
@@
function(compute_all_gradients)
sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS)
vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16
sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS)
vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16
- vdup.u32 r_shift, r14 @ r_shift = { shift, shift
, shift, shift
}
-
+ vdup.u32 r_shift, r14 @ r_shift = { shift, shift
*, shift, shift*
}
+ @ * - vshl.u64: ignored by hw
vadd.u32 uvrg_base, uvrgb_phase
vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x)
vadd.u32 uvrg_base, uvrgb_phase
vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x)
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c
b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c
index
5c05b14
..
bbeccb7
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu_simd.c
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu_simd.c
@@
-313,7
+313,7
@@
typedef union
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
-static int ccount;
+static int ccount
, dump_enabled
;
void cmpp(const char *name, const void *a_, const void *b_, size_t len)
{
const uint32_t *a = a_, *b = b_, masks[] = { 0, 0xff, 0xffff, 0xffffff };
void cmpp(const char *name, const void *a_, const void *b_, size_t len)
{
const uint32_t *a = a_, *b = b_, masks[] = { 0, 0xff, 0xffff, 0xffffff };
@@
-336,8
+336,9
@@
void cmpp(const char *name, const void *a_, const void *b_, size_t len)
void dump_r_(const char *name, void *dump, int is_q)
{
unsigned long long *u = dump;
void dump_r_(const char *name, void *dump, int is_q)
{
unsigned long long *u = dump;
+ if (!dump_enabled) return;
//if (ccount > 1) return;
//if (ccount > 1) return;
- printf("%
1
0s %016llx ", name, u[0]);
+ printf("%
2
0s %016llx ", name, u[0]);
if (is_q)
printf("%016llx", u[1]);
puts("");
if (is_q)
printf("%016llx", u[1]);
puts("");
@@
-497,7
+498,7
@@
void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
gvreg uvrg_base;
gvshll_n_u16(uvrg_base, gvlo(uvrg_xxxx0), 16); // uvrg_base = uvrg0 << 16
gvreg uvrg_base;
gvshll_n_u16(uvrg_base, gvlo(uvrg_xxxx0), 16); // uvrg_base = uvrg0 << 16
- gvdupq_n_
u32(r_shift, shift); // r_shift = { shift, shift,
shift, shift }
+ gvdupq_n_
s64(r_shift, shift); // r_shift = {
shift, shift }
gvaddq_u32(uvrg_base, uvrg_base, uvrgb_phase);
gvabsq_s32(ga_uvrg_x, ga_uvrg_x); // ga_uvrg_x = abs(ga_uvrg_x)
gvaddq_u32(uvrg_base, uvrg_base, uvrgb_phase);
gvabsq_s32(ga_uvrg_x, ga_uvrg_x); // ga_uvrg_x = abs(ga_uvrg_x)
@@
-600,7
+601,7
@@
void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
- vec_
2x32s
alternate_x_16; \
+ vec_
4x16u
alternate_x_16; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
diff --git
a/plugins/gpu_neon/psx_gpu/vector_ops.h
b/plugins/gpu_neon/psx_gpu/vector_ops.h
index
189eb79
..
6f2bcbf
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/vector_ops.h
+++ b/
plugins/gpu_neon/psx_gpu/vector_ops.h
@@
-103,7
+103,7
@@
foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
#define shr_4x16b(dest, source, shift) \
foreach_element(2, (dest).e[_i] = (u32)(source).e[_i] >> (shift)) \
#define shr_4x16b(dest, source, shift) \
- foreach_element(4, (dest).e[_i] = (
source).e[_i] >> (shift))
\
+ foreach_element(4, (dest).e[_i] = (
u16)(source).e[_i] >> (shift))
\
#define shl_4x16b(dest, source, shift) \
foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift)) \
#define shl_4x16b(dest, source, shift) \
foreach_element(4, (dest).e[_i] = (u32)(source).e[_i] << (shift)) \