#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include "common.h"
#endif
#include "psx_gpu_simd.h"
+#if 0
+void dump_r_d(const char *name, void *dump);
+void dump_r_q(const char *name, void *dump);
+#define dumprd(n) dump_r_d(#n, n.e)
+#define dumprq(n) dump_r_q(#n, n.e)
+#endif
+
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
u32 spans = 0;
u32 texture_cache_loads = 0;
u32 false_modulated_blocks = 0;
+#define stats_add(stat, count) // stat += count
+
/* double size for enhancement */
u32 reciprocal_table[512 * 2];
{ \
u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \
if (_num_spans > MAX_SPANS) \
- *(int *)0 = 1; \
+ *(volatile int *)0 = 1; \
if (_num_spans < psx_gpu->num_spans) \
{ \
if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \
- *(int *)0 = 1; \
- if(span_edge_data_element.y > 2048) \
- *(int *)0 = 1; \
+ *(volatile int *)0 = 2; \
+ if(span_edge_data_element.y >= 2048) \
+ *(volatile int *)0 = 3; \
} \
} \
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
- vec_2x32s alternate_x_16; \
+ vec_4x16u alternate_x_16; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
} \
#define setup_blocks_add_blocks_direct() \
- texel_blocks_untextured += span_num_blocks; \
+ stats_add(texel_blocks_untextured, span_num_blocks); \
span_pixel_blocks += span_num_blocks \
void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
{
if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
- texel_blocks_untextured += psx_gpu->num_blocks;
+ stats_add(texel_blocks_untextured, psx_gpu->num_blocks);
}
void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
{
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_4bpp += num_blocks;
+ stats_add(texel_blocks_4bpp, num_blocks);
vec_8x8u texels_low;
vec_8x8u texels_high;
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_8bpp += num_blocks;
+ stats_add(texel_blocks_8bpp, num_blocks);
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
update_texture_8bpp_cache(psx_gpu);
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_16bpp += num_blocks;
+ stats_add(texel_blocks_16bpp, num_blocks);
vec_8x16u texels;
render_triangle_p(psx_gpu, vertex_ptrs, flags);
}
-#ifndef NEON_BUILD
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
{
#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \
- texture_block_ptr = psx_gpu->texture_page_ptr + \
+ texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr + \
((texture_offset + offset) & texture_mask); \
\
load_64b(texels, texture_block_ptr) \
#define setup_sprite_tile_half_8bpp(edge) \
{ \
- setup_sprite_tile_add_blocks(sub_tile_height * 2); \
+ setup_sprite_tile_add_blocks(sub_tile_height); \
\
while(sub_tile_height) \
{ \
u32 num_blocks = psx_gpu->num_blocks; \
block_struct *block = psx_gpu->blocks + num_blocks; \
\
- u16 *texture_block_ptr; \
+ u8 *texture_block_ptr; \
vec_8x8u texels; \
\
setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
setup_sprite_tiled_builder(4bpp,_4x);
setup_sprite_tiled_builder(8bpp,_4x);
+#endif
+
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
texture_offset_base &= ~0x7;
- sprites_16bpp++;
+ stats_add(sprites_16bpp, 1);
if(block_width == 1)
{
texture_block_ptr =
texture_page_ptr + (texture_offset_base & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = mask_bits;
block->fb_ptr = fb_ptr;
texture_offset_base += 1024;
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = left_mask_bits;
block->fb_ptr = fb_ptr;
while(blocks_remaining)
{
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = 0;
block->fb_ptr = fb_ptr;
}
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = right_mask_bits;
block->fb_ptr = fb_ptr;
}
}
+#endif
+
+#ifndef NEON_BUILD
+
void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
num_width = width;
vram_ptr = (void *)vram_ptr16;
- if((long)vram_ptr16 & 2)
+ if((uintptr_t)vram_ptr16 & 2)
{
*vram_ptr16 = color_32bpp;
vram_ptr = (void *)(vram_ptr16 + 1);