#include <string.h>
#include "common.h"
+#ifndef NEON_BUILD
+#include "vector_ops.h"
+#endif
+#include "psx_gpu_simd.h"
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
u32 texture_cache_loads = 0;
u32 false_modulated_blocks = 0;
+#define stats_add(stat, count) // stat += count
+
/* double size for enhancement */
u32 reciprocal_table[512 * 2];
}
}
-void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
- u32 texture_page);
-
#ifndef NEON_BUILD
void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
}
}
-void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-
void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
{
if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) &&
}
-void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
- vertex_struct *b, vertex_struct *c);
-
#ifndef NEON_BUILD
#define setup_gradient_calculation_input(set, vertex) \
setup_spans_up(index_##major, index_##minor, minor, yes) \
-void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-
-
#ifndef NEON_BUILD
void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
} \
#define setup_blocks_add_blocks_direct() \
- texel_blocks_untextured += span_num_blocks; \
+ stats_add(texel_blocks_untextured, span_num_blocks); \
span_pixel_blocks += span_num_blocks \
psx_gpu->num_blocks = num_blocks; \
} \
-void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-
-void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_shaded_untextured_undithered_unswizzled_direct(
- psx_gpu_struct *psx_gpu);
-
-void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-void setup_blocks_unshaded_untextured_undithered_unswizzled_direct(
- psx_gpu_struct *psx_gpu);
-
-void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct
- *psx_gpu);
-
//setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect);
setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
-#endif
-
-void texture_blocks_untextured(psx_gpu_struct *psx_gpu);
-void texture_blocks_4bpp(psx_gpu_struct *psx_gpu);
-void texture_blocks_8bpp(psx_gpu_struct *psx_gpu);
-void texture_blocks_16bpp(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
-
void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
{
if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
- texel_blocks_untextured += psx_gpu->num_blocks;
+ stats_add(texel_blocks_untextured, psx_gpu->num_blocks);
}
void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
{
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_4bpp += num_blocks;
+ stats_add(texel_blocks_4bpp, num_blocks);
vec_8x8u texels_low;
vec_8x8u texels_high;
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_8bpp += num_blocks;
+ stats_add(texel_blocks_8bpp, num_blocks);
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
update_texture_8bpp_cache(psx_gpu);
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_16bpp += num_blocks;
+ stats_add(texel_blocks_16bpp, num_blocks);
vec_8x16u texels;
} \
} \
-void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct
- *psx_gpu);
-
-void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct
- *psx_gpu);
-
-void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
-
#ifndef NEON_BUILD
shade_blocks_textured_modulated_builder(shaded, dithered, direct);
shade_blocks_textured_unmodulated_builder(indirect)
shade_blocks_textured_unmodulated_builder(direct)
-#endif
-
-
-void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
-
void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu)
{
}
} \
} \
-void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu);
-
-void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu);
-
-void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu);
-
#ifndef NEON_BUILD
void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu)
render_blocks_switch_block_texture_mode(4bpp), \
render_blocks_switch_block_texture_mode(8bpp), \
render_blocks_switch_block_texture_mode(16bpp), \
- render_blocks_switch_block_texture_mode(4bpp) \
+ render_blocks_switch_block_texture_mode(16bpp) \
render_block_handler_struct render_triangle_block_handlers[] =
render_triangle_p(psx_gpu, vertex_ptrs, flags);
}
-
-void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
{
#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \
- texture_block_ptr = psx_gpu->texture_page_ptr + \
+ texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr + \
((texture_offset + offset) & texture_mask); \
\
load_64b(texels, texture_block_ptr) \
#define setup_sprite_tile_half_8bpp(edge) \
{ \
- setup_sprite_tile_add_blocks(sub_tile_height * 2); \
+ setup_sprite_tile_add_blocks(sub_tile_height); \
\
while(sub_tile_height) \
{ \
u32 num_blocks = psx_gpu->num_blocks; \
block_struct *block = psx_gpu->blocks + num_blocks; \
\
- u16 *texture_block_ptr; \
+ u8 *texture_block_ptr; \
vec_8x8u texels; \
\
setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
} \
} \
-void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-
-void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-
-void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
- s32 v, s32 width, s32 height, u32 color);
-void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
- s32 u, s32 v, s32 width, s32 height, u32 color);
-
#ifndef NEON_BUILD
setup_sprite_tiled_builder(4bpp,);
setup_sprite_tiled_builder(8bpp,);
setup_sprite_tiled_builder(4bpp,_4x);
setup_sprite_tiled_builder(8bpp,_4x);
+#endif
+
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
texture_offset_base &= ~0x7;
- sprites_16bpp++;
+ stats_add(sprites_16bpp, 1);
if(block_width == 1)
{
texture_block_ptr =
texture_page_ptr + (texture_offset_base & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = mask_bits;
block->fb_ptr = fb_ptr;
texture_offset_base += 1024;
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = left_mask_bits;
block->fb_ptr = fb_ptr;
while(blocks_remaining)
{
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = 0;
block->fb_ptr = fb_ptr;
}
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = right_mask_bits;
block->fb_ptr = fb_ptr;
}
}
+#endif
+
+#ifndef NEON_BUILD
+
void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
- RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0)
+ RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 &&
+ (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0)
{
setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
return;
render_sprite_blocks_switch_block_texture_mode(4bpp), \
render_sprite_blocks_switch_block_texture_mode(8bpp), \
render_sprite_blocks_switch_block_texture_mode(16bpp), \
- render_sprite_blocks_switch_block_texture_mode(4bpp) \
+ render_sprite_blocks_switch_block_texture_mode(16bpp) \
render_block_handler_struct render_sprite_block_handlers[] =
}
#define draw_pixel_line_mask_evaluate_yes() \
- if(*vram_ptr & 0x8000) \
+ if((*vram_ptr & 0x8000) == 0) \
#define draw_pixel_line_mask_evaluate_no() \
memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
initialize_reciprocal_table();
+ psx_gpu->reciprocal_table_ptr = reciprocal_table;
// 00 01 10 11
// 00 0 4 1 5
return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
}
-#ifdef NEON_BUILD
+#if 0 //def NEON_BUILD
u32 get_counter()
{