notaz.gp2x.de
/
pcsx_rearmed.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge pull request #749 from pcercuei/lightrec-allow-mem-override
[pcsx_rearmed.git]
/
plugins
/
gpu_neon
/
psx_gpu
/
psx_gpu.c
diff --git
a/plugins/gpu_neon/psx_gpu/psx_gpu.c
b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index
a79254d
..
85cf89f
100644
(file)
--- a/
plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/
plugins/gpu_neon/psx_gpu/psx_gpu.c
@@
-14,6
+14,7
@@
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include "common.h"
#include <string.h>
#include "common.h"
@@
-22,6
+23,13
@@
#endif
#include "psx_gpu_simd.h"
#endif
#include "psx_gpu_simd.h"
+#if 0
+void dump_r_d(const char *name, void *dump);
+void dump_r_q(const char *name, void *dump);
+#define dumprd(n) dump_r_d(#n, n.e)
+#define dumprq(n) dump_r_q(#n, n.e)
+#endif
+
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
u32 spans = 0;
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
u32 spans = 0;
@@
-51,6
+59,8
@@
u32 zero_block_spans = 0;
u32 texture_cache_loads = 0;
u32 false_modulated_blocks = 0;
u32 texture_cache_loads = 0;
u32 false_modulated_blocks = 0;
+#define stats_add(stat, count) // stat += count
+
/* double size for enhancement */
u32 reciprocal_table[512 * 2];
/* double size for enhancement */
u32 reciprocal_table[512 * 2];
@@
-767,13
+777,13
@@
void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
{ \
u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \
if (_num_spans > MAX_SPANS) \
{ \
u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \
if (_num_spans > MAX_SPANS) \
- *(
int *)0 = 1;
\
+ *(
volatile int *)0 = 1;
\
if (_num_spans < psx_gpu->num_spans) \
{ \
if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \
if (_num_spans < psx_gpu->num_spans) \
{ \
if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \
- *(
int *)0 = 1;
\
- if(span_edge_data_element.y >
2048)
\
- *(
int *)0 = 1;
\
+ *(
volatile int *)0 = 2;
\
+ if(span_edge_data_element.y >
= 2048)
\
+ *(
volatile int *)0 = 3;
\
} \
} \
} \
} \
@@
-786,7
+796,7
@@
void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
- vec_
2x32s
alternate_x_16; \
+ vec_
4x16u
alternate_x_16; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
@@
-1842,7
+1852,7
@@
void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
} \
#define setup_blocks_add_blocks_direct() \
} \
#define setup_blocks_add_blocks_direct() \
-
texel_blocks_untextured += span_num_blocks;
\
+
stats_add(texel_blocks_untextured, span_num_blocks);
\
span_pixel_blocks += span_num_blocks \
span_pixel_blocks += span_num_blocks \
@@
-1938,14
+1948,14
@@
setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
{
if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
{
if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
-
texel_blocks_untextured += psx_gpu->num_blocks
;
+
stats_add(texel_blocks_untextured, psx_gpu->num_blocks)
;
}
void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
{
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
}
void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
{
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
-
texel_blocks_4bpp += num_blocks
;
+
stats_add(texel_blocks_4bpp, num_blocks)
;
vec_8x8u texels_low;
vec_8x8u texels_high;
vec_8x8u texels_low;
vec_8x8u texels_high;
@@
-1997,7
+2007,7
@@
void texture_blocks_8bpp(psx_gpu_struct *psx_gpu)
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
-
texel_blocks_8bpp += num_blocks
;
+
stats_add(texel_blocks_8bpp, num_blocks)
;
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
update_texture_8bpp_cache(psx_gpu);
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
update_texture_8bpp_cache(psx_gpu);
@@
-2031,7
+2041,7
@@
void texture_blocks_16bpp(psx_gpu_struct *psx_gpu)
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
-
texel_blocks_16bpp += num_blocks
;
+
stats_add(texel_blocks_16bpp, num_blocks)
;
vec_8x16u texels;
vec_8x16u texels;
@@
-3067,7
+3077,7
@@
void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
render_triangle_p(psx_gpu, vertex_ptrs, flags);
}
render_triangle_p(psx_gpu, vertex_ptrs, flags);
}
-#if
ndef NEON_BUILD
+#if
!defined(NEON_BUILD) || defined(SIMD_BUILD)
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
{
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
{
@@
-3120,7
+3130,7
@@
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \
#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \
- texture_block_ptr =
psx_gpu->texture_page_ptr +
\
+ texture_block_ptr =
(u8 *)psx_gpu->texture_page_ptr +
\
((texture_offset + offset) & texture_mask); \
\
load_64b(texels, texture_block_ptr) \
((texture_offset + offset) & texture_mask); \
\
load_64b(texels, texture_block_ptr) \
@@
-3228,7
+3238,7
@@
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
#define setup_sprite_tile_half_8bpp(edge) \
{ \
#define setup_sprite_tile_half_8bpp(edge) \
{ \
- setup_sprite_tile_add_blocks(sub_tile_height
* 2);
\
+ setup_sprite_tile_add_blocks(sub_tile_height
);
\
\
while(sub_tile_height) \
{ \
\
while(sub_tile_height) \
{ \
@@
-3682,7
+3692,7
@@
void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\
u32 num_blocks = psx_gpu->num_blocks; \
block_struct *block = psx_gpu->blocks + num_blocks; \
\
u32 num_blocks = psx_gpu->num_blocks; \
block_struct *block = psx_gpu->blocks + num_blocks; \
\
- u
16 *texture_block_ptr;
\
+ u
8 *texture_block_ptr;
\
vec_8x8u texels; \
\
setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
vec_8x8u texels; \
\
setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
@@
-3775,6
+3785,9
@@
setup_sprite_tiled_builder(8bpp,);
setup_sprite_tiled_builder(4bpp,_4x);
setup_sprite_tiled_builder(8bpp,_4x);
setup_sprite_tiled_builder(4bpp,_4x);
setup_sprite_tiled_builder(8bpp,_4x);
+#endif
+
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
@@
-3803,7
+3816,7
@@
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
texture_offset_base &= ~0x7;
texture_offset_base &= ~0x7;
- s
prites_16bpp++
;
+ s
tats_add(sprites_16bpp, 1)
;
if(block_width == 1)
{
if(block_width == 1)
{
@@
-3824,7
+3837,7
@@
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
texture_block_ptr =
texture_page_ptr + (texture_offset_base & texture_mask);
texture_block_ptr =
texture_page_ptr + (texture_offset_base & texture_mask);
-
load_128b(block->texels, texture_block_ptr)
;
+
block->texels = *(vec_8x16u *)texture_block_ptr
;
block->draw_mask_bits = mask_bits;
block->fb_ptr = fb_ptr;
block->draw_mask_bits = mask_bits;
block->fb_ptr = fb_ptr;
@@
-3858,7
+3871,7
@@
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
texture_offset_base += 1024;
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
texture_offset_base += 1024;
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
-
load_128b(block->texels, texture_block_ptr)
;
+
block->texels = *(vec_8x16u *)texture_block_ptr
;
block->draw_mask_bits = left_mask_bits;
block->fb_ptr = fb_ptr;
block->draw_mask_bits = left_mask_bits;
block->fb_ptr = fb_ptr;
@@
-3870,7
+3883,7
@@
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
while(blocks_remaining)
{
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
while(blocks_remaining)
{
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
-
load_128b(block->texels, texture_block_ptr)
;
+
block->texels = *(vec_8x16u *)texture_block_ptr
;
block->draw_mask_bits = 0;
block->fb_ptr = fb_ptr;
block->draw_mask_bits = 0;
block->fb_ptr = fb_ptr;
@@
-3883,7
+3896,7
@@
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
}
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
}
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
-
load_128b(block->texels, texture_block_ptr)
;
+
block->texels = *(vec_8x16u *)texture_block_ptr
;
block->draw_mask_bits = right_mask_bits;
block->fb_ptr = fb_ptr;
block->draw_mask_bits = right_mask_bits;
block->fb_ptr = fb_ptr;
@@
-3897,6
+3910,10
@@
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
}
}
}
}
+#endif
+
+#ifndef NEON_BUILD
+
void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
@@
-4000,7
+4017,7
@@
void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
num_width = width;
vram_ptr = (void *)vram_ptr16;
num_width = width;
vram_ptr = (void *)vram_ptr16;
- if((
long
)vram_ptr16 & 2)
+ if((
uintptr_t
)vram_ptr16 & 2)
{
*vram_ptr16 = color_32bpp;
vram_ptr = (void *)(vram_ptr16 + 1);
{
*vram_ptr16 = color_32bpp;
vram_ptr = (void *)(vram_ptr16 + 1);