#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
#include <string.h>
+#include <assert.h>
#include "common.h"
+#ifndef NEON_BUILD
+#include "vector_ops.h"
+#endif
+#include "psx_gpu_simd.h"
+#include "psx_gpu_offsets.h"
+
+#if 0
+void dump_r_d(const char *name, void *dump);
+void dump_r_q(const char *name, void *dump);
+#define dumprd(n) dump_r_d(#n, n.e)
+#define dumprq(n) dump_r_q(#n, n.e)
+#endif
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
u32 texture_cache_loads = 0;
u32 false_modulated_blocks = 0;
+#define stats_add(stat, count) // stat += count
+
/* double size for enhancement */
u32 reciprocal_table[512 * 2];
return mask;
}
-void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1,
- u32 x2, u32 y2)
+static void update_texture_cache_region_(psx_gpu_struct *psx_gpu,
+ u32 x1, u32 y1, u32 x2, u32 y2)
{
u32 mask = texture_region_mask(x1, y1, x2, y2);
u32 texture_page;
}
}
-void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
- u32 texture_page);
+void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1,
+ u32 x2, u32 y2)
+{
+ s32 w = x2 - x1;
+ do
+ {
+ x2 = x1 + w;
+ if (x2 > 1023)
+ x2 = 1023;
+ update_texture_cache_region_(psx_gpu, x1, y1, x2, y2);
+ w -= x2 - x1;
+ x1 = 0;
+ }
+ while (unlikely(w > 0));
+}
#ifndef NEON_BUILD
}
}
-void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-
void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
{
if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) &&
}
-void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
- vertex_struct *b, vertex_struct *c);
-
#ifndef NEON_BUILD
#define setup_gradient_calculation_input(set, vertex) \
printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \
+#if !defined(NEON_BUILD) && !defined(NDEBUG)
+static void setup_spans_debug_check(psx_gpu_struct *psx_gpu,
+ edge_data_struct *span_edge_data_element)
+{
+ u32 _num_spans = span_edge_data_element - psx_gpu->span_edge_data;
+ if (_num_spans > MAX_SPANS)
+ *(volatile int *)0 = 1;
+ if (_num_spans < psx_gpu->num_spans)
+ {
+ if(span_edge_data_element->num_blocks > MAX_BLOCKS_PER_ROW)
+ *(volatile int *)0 = 2;
+ if(span_edge_data_element->y >= 2048)
+ *(volatile int *)0 = 3;
+ }
+}
+#else
+#define setup_spans_debug_check(psx_gpu, span_edge_data_element)
+#endif
+
#define setup_spans_prologue_alternate_yes() \
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
vec_4x32s alternate_x_32; \
- vec_2x32s alternate_x_16; \
+ vec_4x16u alternate_x_16; \
\
vec_4x16u alternate_select; \
vec_4x16s y_mid_point; \
span_b_offset = psx_gpu->span_b_offset; \
\
vec_8x16u c_0x0001; \
+ vec_4x16u c_max_blocks_per_row; \
\
dup_8x16b(c_0x0001, 0x0001); \
dup_8x16b(left_edge, psx_gpu->viewport_start_x); \
dup_4x16b(c_0x04, 0x04); \
dup_4x16b(c_0x07, 0x07); \
dup_4x16b(c_0xFFFE, 0xFFFE); \
+ dup_4x16b(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \
#define compute_edge_delta_x2() \
and_4x16b(span_shift, left_right_x_16.high, c_0x07); \
shl_variable_4x16b(span_shift, c_0xFFFE, span_shift); \
shr_4x16b(left_right_x_16.high, left_right_x_16.high, 3); \
+ min_4x16b(left_right_x_16.high, left_right_x_16.high, c_max_blocks_per_row); \
\
u32 i; \
for(i = 0; i < 4; i++) \
span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \
span_edge_data[i].right_mask = span_shift.e[i]; \
span_edge_data[i].y = y_x4.e[i]; \
+ setup_spans_debug_check(psx_gpu, &span_edge_data[i]); \
} \
\
span_edge_data += 4; \
\
setup_spans_prologue_b(); \
\
- if(height > 0) \
+ if (height > 512) \
+ height = 512; \
+ if (height > 0) \
{ \
y_x4.e[0] = y_a; \
y_x4.e[1] = y_a + 1; \
\
setup_spans_prologue_b(); \
\
- if(height > 0) \
+ if (height > 512) \
+ height = 512; \
+ if (height > 0) \
{ \
y_x4.e[0] = y_a; \
y_x4.e[1] = y_a - 1; \
setup_spans_up(index_##major, index_##minor, minor, yes) \
-void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
- vertex_struct *v_b, vertex_struct *v_c);
-
-
#ifndef NEON_BUILD
void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
setup_spans_prologue_b();
- if(height_minor_a > 0)
+ if (height_minor_a > 512)
+ height_minor_a = 512;
+ if (height_minor_a > 0)
{
y_x4.e[0] = y_a;
y_x4.e[1] = y_a - 1;
setup_spans_clip(increment, no);
}
- if(height_minor_b > 0)
+ if (height_minor_b > 512)
+ height_minor_b = 512;
+ if (height_minor_b > 0)
{
y_x4.e[0] = y_a;
y_x4.e[1] = y_a + 1;
y_x4.e[3] = y_a + 3;
setup_spans_adjust_edges_alternate_no(index_left, index_right);
+ // FIXME: overflow corner case
+ if(psx_gpu->num_spans + height_minor_b == MAX_SPANS)
+ height_minor_b &= ~3;
+
psx_gpu->num_spans += height_minor_b;
- do
+ while(height_minor_b > 0)
{
setup_spans_set_x4(none, down, no);
height_minor_b -= 4;
- } while(height_minor_b > 0);
+ }
}
left_split_triangles++;
} \
#define setup_blocks_add_blocks_direct() \
- texel_blocks_untextured += span_num_blocks; \
+ stats_add(texel_blocks_untextured, span_num_blocks); \
span_pixel_blocks += span_num_blocks \
psx_gpu->num_blocks = num_blocks; \
} \
-void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-
-void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_shaded_untextured_undithered_unswizzled_direct(
- psx_gpu_struct *psx_gpu);
-
-void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect(
- psx_gpu_struct *psx_gpu);
-void setup_blocks_unshaded_untextured_undithered_unswizzled_direct(
- psx_gpu_struct *psx_gpu);
-
-void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct
- *psx_gpu);
-void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct
- *psx_gpu);
-
//setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect);
setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct);
-#endif
-
-void texture_blocks_untextured(psx_gpu_struct *psx_gpu);
-void texture_blocks_4bpp(psx_gpu_struct *psx_gpu);
-void texture_blocks_8bpp(psx_gpu_struct *psx_gpu);
-void texture_blocks_16bpp(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
-
void texture_blocks_untextured(psx_gpu_struct *psx_gpu)
{
if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)
- texel_blocks_untextured += psx_gpu->num_blocks;
+ stats_add(texel_blocks_untextured, psx_gpu->num_blocks);
}
void texture_blocks_4bpp(psx_gpu_struct *psx_gpu)
{
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_4bpp += num_blocks;
+ stats_add(texel_blocks_4bpp, num_blocks);
vec_8x8u texels_low;
vec_8x8u texels_high;
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_8bpp += num_blocks;
+ stats_add(texel_blocks_8bpp, num_blocks);
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask)
update_texture_8bpp_cache(psx_gpu);
block_struct *block = psx_gpu->blocks;
u32 num_blocks = psx_gpu->num_blocks;
- texel_blocks_16bpp += num_blocks;
+ stats_add(texel_blocks_16bpp, num_blocks);
vec_8x16u texels;
} \
} \
-void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct
- *psx_gpu);
-
-void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct
- *psx_gpu);
-void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct
- *psx_gpu);
-
-void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
-
#ifndef NEON_BUILD
shade_blocks_textured_modulated_builder(shaded, dithered, direct);
shade_blocks_textured_unmodulated_builder(indirect)
shade_blocks_textured_unmodulated_builder(direct)
-#endif
-
-
-void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
-
void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu)
{
}
} \
} \
-void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu);
-
-void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu);
-
-void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu);
-void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu);
-
#ifndef NEON_BUILD
void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu)
render_blocks_switch_block_texture_mode(4bpp), \
render_blocks_switch_block_texture_mode(8bpp), \
render_blocks_switch_block_texture_mode(16bpp), \
- render_blocks_switch_block_texture_mode(4bpp) \
+ render_blocks_switch_block_texture_mode(16bpp) \
render_block_handler_struct render_triangle_block_handlers[] =
}
}
}
+ assert(psx_gpu->span_edge_data[0].y < 1024u);
u32 render_state = flags &
(RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
render_triangle_p(psx_gpu, vertex_ptrs, flags);
}
-
-void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu);
-
-#ifndef NEON_BUILD
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
{
#endif
-#define setup_sprite_tiled_initialize_4bpp() \
+#define setup_sprite_tiled_initialize_4bpp_clut() \
u16 *clut_ptr = psx_gpu->clut_ptr; \
vec_8x16u clut_a, clut_b; \
vec_16x8u clut_low, clut_high; \
\
load_8x16b(clut_a, clut_ptr); \
load_8x16b(clut_b, clut_ptr + 8); \
- unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \
+ unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \
+
+#define setup_sprite_tiled_initialize_4bpp() \
+ setup_sprite_tiled_initialize_4bpp_clut(); \
\
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \
update_texture_4bpp_cache(psx_gpu) \
#define setup_sprite_tile_fetch_texel_block_8bpp(offset) \
- texture_block_ptr = psx_gpu->texture_page_ptr + \
+ texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr + \
((texture_offset + offset) & texture_mask); \
\
load_64b(texels, texture_block_ptr) \
-#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \
-
-#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \
-
#define setup_sprite_tile_add_blocks(tile_num_blocks) \
num_blocks += tile_num_blocks; \
sprite_blocks += tile_num_blocks; \
#define setup_sprite_tile_half_8bpp(edge) \
{ \
- setup_sprite_tile_add_blocks(sub_tile_height * 2); \
+ setup_sprite_tile_add_blocks(sub_tile_height); \
\
while(sub_tile_height) \
{ \
#define setup_sprite_tile_column_edge_post_adjust_full(edge) \
-#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \
+ x4mode) \
do \
{ \
sub_tile_height = column_data; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
} while(0) \
-#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \
+ x4mode) \
do \
{ \
u32 tiles_remaining = column_data >> 16; \
sub_tile_height = column_data & 0xFF; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
tiles_remaining -= 1; \
\
while(tiles_remaining) \
{ \
sub_tile_height = 16; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
tiles_remaining--; \
} \
\
sub_tile_height = (column_data >> 8) & 0xFF; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
} while(0) \
column_data |= (tile_height - 1) << 16 \
+#define RIGHT_MASK_BIT_SHIFT 8
+#define RIGHT_MASK_BIT_SHIFT_4x 16
+
#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \
- edge_mode, edge) \
+ edge_mode, edge, x4mode) \
{ \
setup_sprite_column_data_##multi_height(); \
left_mask_bits = left_block_mask | right_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \
- texture_mode); \
+ texture_mode, x4mode); \
} \
#define setup_sprite_tiled_advance_column() \
if((texture_offset_base & 0xF00) == 0) \
texture_offset_base -= (0x100 + 0xF00) \
+#define FB_PTR_MULTIPLIER 1
+#define FB_PTR_MULTIPLIER_4x 2
+
#define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \
- left_mode, right_mode) \
+ left_mode, right_mode, x4mode) \
{ \
setup_sprite_column_data_##multi_height(); \
- s32 fb_ptr_advance_column = 16 - (1024 * height); \
+ s32 fb_ptr_advance_column = (16 - (1024 * height)) \
+ * FB_PTR_MULTIPLIER##x4mode; \
\
tile_width -= 2; \
left_mask_bits = left_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tile_column_height_##multi_height(left_mode, right, \
- texture_mode); \
+ texture_mode, x4mode); \
fb_ptr += fb_ptr_advance_column; \
\
left_mask_bits = 0x00; \
while(tile_width) \
{ \
setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height(full, none, texture_mode); \
+ setup_sprite_tile_column_height_##multi_height(full, none, \
+ texture_mode, x4mode); \
fb_ptr += fb_ptr_advance_column; \
tile_width--; \
} \
\
left_mask_bits = right_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tiled_advance_column(); \
setup_sprite_tile_column_height_##multi_height(right_mode, left, \
- texture_mode); \
+ texture_mode, x4mode); \
+} \
+
+
+/* 4x stuff */
+#define setup_sprite_tiled_initialize_4bpp_4x() \
+ setup_sprite_tiled_initialize_4bpp_clut() \
+
+#define setup_sprite_tiled_initialize_8bpp_4x() \
+
+
+#define setup_sprite_tile_full_4bpp_4x(edge) \
+{ \
+ vec_8x8u texels_low, texels_high; \
+ vec_8x16u pixels, pixels_wide; \
+ setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
+ u32 left_mask_bits_a = left_mask_bits & 0xFF; \
+ u32 left_mask_bits_b = left_mask_bits >> 8; \
+ u32 right_mask_bits_a = right_mask_bits & 0xFF; \
+ u32 right_mask_bits_b = right_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ setup_sprite_tile_fetch_texel_block_8bpp(8); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 16; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024 + 16; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 24; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
} \
+#define setup_sprite_tile_half_4bpp_4x(edge) \
+{ \
+ vec_8x8u texels_low, texels_high; \
+ vec_8x16u pixels, pixels_wide; \
+ setup_sprite_tile_add_blocks(sub_tile_height * 4); \
+ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
+ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
-#define setup_sprite_tiled_builder(texture_mode) \
-void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
+
+#define setup_sprite_tile_full_8bpp_4x(edge) \
+{ \
+ setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
+ vec_16x8u texels_wide; \
+ u32 left_mask_bits_a = left_mask_bits & 0xFF; \
+ u32 left_mask_bits_b = left_mask_bits >> 8; \
+ u32 right_mask_bits_a = right_mask_bits & 0xFF; \
+ u32 right_mask_bits_b = right_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ setup_sprite_tile_fetch_texel_block_8bpp(8); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 16; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024 + 16; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24 + 1024; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
+
+#define setup_sprite_tile_half_8bpp_4x(edge) \
+{ \
+ setup_sprite_tile_add_blocks(sub_tile_height * 4); \
+ vec_16x8u texels_wide; \
+ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
+ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8 + 1024; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
+
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
+ texture_offset = texture_offset_base + 8; \
+ fb_ptr += 16 \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
+ texture_offset = texture_offset_base \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
+ texture_offset = texture_offset_base \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
+ fb_ptr -= 16 \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
+
+
+#define setup_sprite_offset_u_adjust() \
+
+#define setup_sprite_comapre_left_block_mask() \
+ ((left_block_mask & 0xFF) == 0xFF) \
+
+#define setup_sprite_comapre_right_block_mask() \
+ (((right_block_mask >> 8) & 0xFF) == 0xFF) \
+
+
+#define setup_sprite_offset_u_adjust_4x() \
+ offset_u *= 2; \
+ offset_u_right = offset_u_right * 2 + 1 \
+
+#define setup_sprite_comapre_left_block_mask_4x() \
+ ((left_block_mask & 0xFFFF) == 0xFFFF) \
+
+#define setup_sprite_comapre_right_block_mask_4x() \
+ (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \
+
+
+#define setup_sprite_tiled_builder(texture_mode, x4mode) \
+void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\
s32 u, s32 v, s32 width, s32 height, u32 color) \
{ \
s32 offset_u = u & 0xF; \
s32 tile_width = width_rounded / 16; \
u32 offset_u_right = width_rounded & 0xF; \
\
- u32 left_block_mask = ~(0xFFFF << offset_u); \
- u32 right_block_mask = 0xFFFE << offset_u_right; \
+ setup_sprite_offset_u_adjust##x4mode(); \
+ \
+ u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \
+ u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \
\
u32 left_mask_bits; \
u32 right_mask_bits; \
u32 texture_offset_base = texture_offset; \
u32 control_mask; \
\
- u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \
+ u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \
u32 num_blocks = psx_gpu->num_blocks; \
block_struct *block = psx_gpu->blocks + num_blocks; \
\
- u16 *texture_block_ptr; \
+ u8 *texture_block_ptr; \
vec_8x8u texels; \
\
- setup_sprite_tiled_initialize_##texture_mode(); \
+ setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
\
control_mask = tile_width == 1; \
control_mask |= (tile_height == 1) << 1; \
- control_mask |= ((left_block_mask & 0xFF) == 0xFF) << 2; \
- control_mask |= (((right_block_mask >> 8) & 0xFF) == 0xFF) << 3; \
+ control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \
+ control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \
\
sprites_##texture_mode++; \
\
{ \
default: \
case 0x0: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \
+ x4mode); \
break; \
\
case 0x1: \
- setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \
+ x4mode); \
break; \
\
case 0x2: \
- setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \
+ x4mode); \
break; \
\
case 0x3: \
- setup_sprite_tile_column_width_single(texture_mode, single, full, none); \
+ setup_sprite_tile_column_width_single(texture_mode, single, full, none, \
+ x4mode); \
break; \
\
case 0x4: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \
+ x4mode); \
break; \
\
case 0x5: \
- setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \
+ x4mode); \
break; \
\
case 0x6: \
- setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \
+ x4mode); \
break; \
\
case 0x7: \
- setup_sprite_tile_column_width_single(texture_mode, single, half, right);\
+ setup_sprite_tile_column_width_single(texture_mode, single, half, right, \
+ x4mode); \
break; \
\
case 0x8: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \
+ x4mode); \
break; \
\
case 0x9: \
- setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \
+ x4mode); \
break; \
\
case 0xA: \
- setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \
+ x4mode); \
break; \
\
case 0xB: \
- setup_sprite_tile_column_width_single(texture_mode, single, half, left); \
+ setup_sprite_tile_column_width_single(texture_mode, single, half, left, \
+ x4mode); \
break; \
\
case 0xC: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \
+ x4mode); \
break; \
\
case 0xE: \
- setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \
+ x4mode); \
break; \
} \
} \
+#ifndef NEON_BUILD
+setup_sprite_tiled_builder(4bpp,);
+setup_sprite_tiled_builder(8bpp,);
-void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
+setup_sprite_tiled_builder(4bpp,_4x);
+setup_sprite_tiled_builder(8bpp,_4x);
+#endif
-#ifndef NEON_BUILD
-setup_sprite_tiled_builder(4bpp);
-setup_sprite_tiled_builder(8bpp);
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
texture_offset_base &= ~0x7;
- sprites_16bpp++;
+ stats_add(sprites_16bpp, 1);
if(block_width == 1)
{
texture_block_ptr =
texture_page_ptr + (texture_offset_base & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = mask_bits;
block->fb_ptr = fb_ptr;
texture_offset_base += 1024;
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = left_mask_bits;
block->fb_ptr = fb_ptr;
while(blocks_remaining)
{
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = 0;
block->fb_ptr = fb_ptr;
}
texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
- load_128b(block->texels, texture_block_ptr);
+ block->texels = *(vec_8x16u *)texture_block_ptr;
block->draw_mask_bits = right_mask_bits;
block->fb_ptr = fb_ptr;
#endif
-void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+#ifndef NEON_BUILD
+
+void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
u32 right_width = ((width - 1) & 0x7) + 1;
}
}
+#endif
+
+static void __attribute__((noinline))
+setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color)
+{
+ u32 r = color & 0xFF;
+ u32 g = (color >> 8) & 0xFF;
+ u32 b = (color >> 16) & 0xFF;
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+ psx_gpu->mask_msb;
+ u32 color_32bpp = color_16bpp | (color_16bpp << 16);
+
+ u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024);
+ u32 *vram_ptr;
+
+ u32 num_width;
+
+ if(psx_gpu->num_blocks)
+ {
+ flush_render_block_buffer(psx_gpu);
+ }
+
+ while(height)
+ {
+ num_width = width;
+
+ vram_ptr = (void *)vram_ptr16;
+ if((uintptr_t)vram_ptr16 & 2)
+ {
+ *vram_ptr16 = color_32bpp;
+ vram_ptr = (void *)(vram_ptr16 + 1);
+ num_width--;
+ }
+
+ while(num_width >= 4 * 2)
+ {
+ vram_ptr[0] = color_32bpp;
+ vram_ptr[1] = color_32bpp;
+ vram_ptr[2] = color_32bpp;
+ vram_ptr[3] = color_32bpp;
+
+ vram_ptr += 4;
+ num_width -= 4 * 2;
+ }
+
+ while(num_width >= 2)
+ {
+ *vram_ptr++ = color_32bpp;
+ num_width -= 2;
+ }
+
+ if(num_width > 0)
+ {
+ *(u16 *)vram_ptr = color_32bpp;
+ }
+
+ vram_ptr16 += 1024;
+ height--;
+ }
+}
+
+void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color);
+
+void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color)
+{
+ if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
+ RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 &&
+ (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0)
+ {
+ setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
+ return;
+ }
+
+ while (width > 0)
+ {
+ s32 w1 = width > 512 ? 512 : width;
+ setup_sprite_untextured_512(psx_gpu, x, y, 0, 0, w1, height, color);
+ x += 512;
+ width -= 512;
+ }
+}
#define setup_sprite_blocks_switch_textured(texture_mode) \
render_sprite_blocks_switch_block_texture_mode(4bpp), \
render_sprite_blocks_switch_block_texture_mode(8bpp), \
render_sprite_blocks_switch_block_texture_mode(16bpp), \
- render_sprite_blocks_switch_block_texture_mode(4bpp) \
+ render_sprite_blocks_switch_block_texture_mode(16bpp) \
render_block_handler_struct render_sprite_block_handlers[] =
}
#define draw_pixel_line_mask_evaluate_yes() \
- if(*vram_ptr & 0x8000) \
+ if((*vram_ptr & 0x8000) == 0) \
#define draw_pixel_line_mask_evaluate_no() \
if(vertex_a->x >= vertex_b->x)
{
vertex_swap(vertex_a, vertex_b);
+ (void)triangle_winding;
}
x_a = vertex_a->x;
}
}
+#ifndef PCSX
void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
u32 width, u32 height, u32 pitch)
{
render_block_copy(psx_gpu, psx_gpu->vram_ptr + source_x + (source_y * 1024),
dest_x, dest_y, width, height, 1024);
}
-
+#endif
void initialize_reciprocal_table(void)
{
{
shift = __builtin_clz(height);
height_normalized = height << shift;
- height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) /
+ height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) /
height_normalized;
- shift = 32 - (52 - shift);
+ shift = 32 - (51 - shift);
reciprocal_table[height] = (height_reciprocal << 10) | shift;
}
psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;
psx_gpu->clut_ptr = psx_gpu->vram_ptr;
+ psx_gpu->viewport_start_x = psx_gpu->viewport_start_y = 0;
+ psx_gpu->viewport_end_x = psx_gpu->viewport_end_y = 0;
psx_gpu->mask_msb = 0;
psx_gpu->texture_window_x = 0;
memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
initialize_reciprocal_table();
+ psx_gpu->reciprocal_table_ptr = reciprocal_table;
// 00 01 10 11
// 00 0 4 1 5
psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN;
- psx_gpu->enhancement_x_threshold = 256;
+ psx_gpu->saved_hres = 256;
+
+ // check some offset
+ psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0;
}
u64 get_us(void)
return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
}
-#ifdef NEON_BUILD
+#if 0 //def NEON_BUILD
u32 get_counter()
{
#endif
#include "psx_gpu_4x.c"
+
+// vim:ts=2:sw=2:expandtab