X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu.c;h=b671a757bd92581b5dd28473ea94dfac7475e8ad;hb=HEAD;hp=28ebcf51e2b6cda5b7ce1a39d71a3f9d16ee783e;hpb=c6063f8985c69362a89a12111f393229ab65d05f;p=pcsx_rearmed.git diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 28ebcf51..b671a757 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -14,9 +14,24 @@ #include #include +#include +#include #include +#include #include "common.h" +#ifndef NEON_BUILD +#include "vector_ops.h" +#endif +#include "psx_gpu_simd.h" +#include "psx_gpu_offsets.h" + +#if 0 +void dump_r_d(const char *name, void *dump); +void dump_r_q(const char *name, void *dump); +#define dumprd(n) dump_r_d(#n, n.e) +#define dumprq(n) dump_r_q(#n, n.e) +#endif u32 span_pixels = 0; u32 span_pixel_blocks = 0; @@ -47,6 +62,8 @@ u32 zero_block_spans = 0; u32 texture_cache_loads = 0; u32 false_modulated_blocks = 0; +#define stats_add(stat, count) // stat += count + /* double size for enhancement */ u32 reciprocal_table[512 * 2]; @@ -245,8 +262,8 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, return mask; } -void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, - u32 x2, u32 y2) +static void update_texture_cache_region_(psx_gpu_struct *psx_gpu, + u32 x1, u32 y1, u32 x2, u32 y2) { u32 mask = texture_region_mask(x1, y1, x2, y2); u32 texture_page; @@ -298,8 +315,21 @@ void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, } } -void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, - u32 texture_page); +void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, + u32 x2, u32 y2) +{ + s32 w = x2 - x1; + do + { + x2 = x1 + w; + if (x2 > 1023) + x2 = 1023; + update_texture_cache_region_(psx_gpu, x1, y1, x2, y2); + w -= x2 - x1; + x1 = 0; + } + while (unlikely(w > 0)); +} #ifndef NEON_BUILD @@ -449,9 +479,6 @@ void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu) } } -void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); - void flush_render_block_buffer(psx_gpu_struct *psx_gpu) { if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) && @@ -515,9 +542,6 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) } -void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, - vertex_struct *b, vertex_struct *c); - #ifndef NEON_BUILD #define setup_gradient_calculation_input(set, vertex) \ @@ -767,11 +791,30 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \ +#if !defined(NEON_BUILD) && !defined(NDEBUG) +static void setup_spans_debug_check(psx_gpu_struct *psx_gpu, + edge_data_struct *span_edge_data_element) +{ + u32 _num_spans = span_edge_data_element - psx_gpu->span_edge_data; + if (_num_spans > MAX_SPANS) + *(volatile int *)0 = 1; + if (_num_spans < psx_gpu->num_spans) + { + if(span_edge_data_element->num_blocks > MAX_BLOCKS_PER_ROW) + *(volatile int *)0 = 2; + if(span_edge_data_element->y >= 2048) + *(volatile int *)0 = 3; + } +} +#else +#define setup_spans_debug_check(psx_gpu, span_edge_data_element) +#endif + #define setup_spans_prologue_alternate_yes() \ vec_2x64s alternate_x; \ vec_2x64s alternate_dx_dy; \ vec_4x32s alternate_x_32; \ - vec_2x32s alternate_x_16; \ + vec_4x16u alternate_x_16; \ \ vec_4x16u alternate_select; \ vec_4x16s y_mid_point; \ @@ -831,6 +874,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_b_offset = psx_gpu->span_b_offset; \ \ vec_8x16u c_0x0001; \ + vec_4x16u c_max_blocks_per_row; \ \ dup_8x16b(c_0x0001, 0x0001); \ dup_8x16b(left_edge, psx_gpu->viewport_start_x); \ @@ -839,6 +883,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, dup_4x16b(c_0x04, 0x04); \ dup_4x16b(c_0x07, 0x07); \ dup_4x16b(c_0xFFFE, 0xFFFE); \ + dup_4x16b(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \ #define compute_edge_delta_x2() \ @@ -1062,6 +1107,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, and_4x16b(span_shift, left_right_x_16.high, c_0x07); \ shl_variable_4x16b(span_shift, c_0xFFFE, span_shift); \ shr_4x16b(left_right_x_16.high, left_right_x_16.high, 3); \ + min_4x16b(left_right_x_16.high, left_right_x_16.high, c_max_blocks_per_row); \ \ u32 i; \ for(i = 0; i < 4; i++) \ @@ -1070,6 +1116,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \ span_edge_data[i].right_mask = span_shift.e[i]; \ span_edge_data[i].y = y_x4.e[i]; \ + setup_spans_debug_check(psx_gpu, &span_edge_data[i]); \ } \ \ span_edge_data += 4; \ @@ -1099,7 +1146,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a + 1; \ @@ -1147,7 +1196,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a - 1; \ @@ -1184,26 +1235,6 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, setup_spans_up(index_##major, index_##minor, minor, yes) \ -void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); - - #ifndef NEON_BUILD void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, @@ -1357,7 +1388,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_prologue_b(); - if(height_minor_a > 0) + if (height_minor_a > 512) + height_minor_a = 512; + if (height_minor_a > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a - 1; @@ -1399,7 +1432,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_clip(increment, no); } - if(height_minor_b > 0) + if (height_minor_b > 512) + height_minor_b = 512; + if (height_minor_b > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a + 1; @@ -1407,12 +1442,16 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, y_x4.e[3] = y_a + 3; setup_spans_adjust_edges_alternate_no(index_left, index_right); + // FIXME: overflow corner case + if(psx_gpu->num_spans + height_minor_b == MAX_SPANS) + height_minor_b &= ~3; + psx_gpu->num_spans += height_minor_b; - do + while(height_minor_b > 0) { setup_spans_set_x4(none, down, no); height_minor_b -= 4; - } while(height_minor_b > 0); + } } left_split_triangles++; @@ -1842,7 +1881,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, } \ #define setup_blocks_add_blocks_direct() \ - texel_blocks_untextured += span_num_blocks; \ + stats_add(texel_blocks_untextured, span_num_blocks); \ span_pixel_blocks += span_num_blocks \ @@ -1916,30 +1955,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ psx_gpu->num_blocks = num_blocks; \ } \ -void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); - -void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); -void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct - *psx_gpu); -void setup_blocks_shaded_untextured_undithered_unswizzled_direct( - psx_gpu_struct *psx_gpu); - -void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); -void setup_blocks_unshaded_untextured_undithered_unswizzled_direct( - psx_gpu_struct *psx_gpu); - -void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct - *psx_gpu); - //setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); @@ -1959,26 +1974,17 @@ setup_blocks_builder(shaded, untextured, dithered, unswizzled, direct); setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect); setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); -#endif - -void texture_blocks_untextured(psx_gpu_struct *psx_gpu); -void texture_blocks_4bpp(psx_gpu_struct *psx_gpu); -void texture_blocks_8bpp(psx_gpu_struct *psx_gpu); -void texture_blocks_16bpp(psx_gpu_struct *psx_gpu); - -#ifndef NEON_BUILD - void texture_blocks_untextured(psx_gpu_struct *psx_gpu) { if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE) - texel_blocks_untextured += psx_gpu->num_blocks; + stats_add(texel_blocks_untextured, psx_gpu->num_blocks); } void texture_blocks_4bpp(psx_gpu_struct *psx_gpu) { block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_4bpp += num_blocks; + stats_add(texel_blocks_4bpp, num_blocks); vec_8x8u texels_low; vec_8x8u texels_high; @@ -2030,7 +2036,7 @@ void texture_blocks_8bpp(psx_gpu_struct *psx_gpu) block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_8bpp += num_blocks; + stats_add(texel_blocks_8bpp, num_blocks); if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask) update_texture_8bpp_cache(psx_gpu); @@ -2064,7 +2070,7 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_16bpp += num_blocks; + stats_add(texel_blocks_16bpp, num_blocks); vec_8x16u texels; @@ -2252,27 +2258,6 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \ } \ } \ -void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct - *psx_gpu); - -void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct - *psx_gpu); - -void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -2358,14 +2343,6 @@ void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \ shade_blocks_textured_unmodulated_builder(indirect) shade_blocks_textured_unmodulated_builder(direct) -#endif - - -void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu); - -#ifndef NEON_BUILD - void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu) { } @@ -2577,27 +2554,6 @@ void \ } \ } \ -void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu); - -void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu); - -void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu) @@ -2860,7 +2816,7 @@ blend_blocks_builder(textured, unblended, on); render_blocks_switch_block_texture_mode(4bpp), \ render_blocks_switch_block_texture_mode(8bpp), \ render_blocks_switch_block_texture_mode(16bpp), \ - render_blocks_switch_block_texture_mode(4bpp) \ + render_blocks_switch_block_texture_mode(16bpp) \ render_block_handler_struct render_triangle_block_handlers[] = @@ -3118,14 +3074,7 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, } } } - if(psx_gpu->render_mode & RENDER_DOUBLE_MODE) - { - u32 i; - for(i = 0; i < psx_gpu->num_spans; i++) - { - psx_gpu->span_edge_data[i].y *= 2; - } - } + assert(psx_gpu->span_edge_data[0].y < 1024u); u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -3158,10 +3107,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, render_triangle_p(psx_gpu, vertex_ptrs, flags); } - -void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); - -#ifndef NEON_BUILD +#if !defined(NEON_BUILD) || defined(SIMD_BUILD) void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) { @@ -3193,14 +3139,17 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #endif -#define setup_sprite_tiled_initialize_4bpp() \ +#define setup_sprite_tiled_initialize_4bpp_clut() \ u16 *clut_ptr = psx_gpu->clut_ptr; \ vec_8x16u clut_a, clut_b; \ vec_16x8u clut_low, clut_high; \ \ load_8x16b(clut_a, clut_ptr); \ load_8x16b(clut_b, clut_ptr + 8); \ - unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \ + unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \ + +#define setup_sprite_tiled_initialize_4bpp() \ + setup_sprite_tiled_initialize_4bpp_clut(); \ \ if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \ update_texture_4bpp_cache(psx_gpu) \ @@ -3211,16 +3160,12 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_fetch_texel_block_8bpp(offset) \ - texture_block_ptr = psx_gpu->texture_page_ptr + \ + texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr + \ ((texture_offset + offset) & texture_mask); \ \ load_64b(texels, texture_block_ptr) \ -#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \ - -#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \ - #define setup_sprite_tile_add_blocks(tile_num_blocks) \ num_blocks += tile_num_blocks; \ sprite_blocks += tile_num_blocks; \ @@ -3323,7 +3268,7 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_half_8bpp(edge) \ { \ - setup_sprite_tile_add_blocks(sub_tile_height * 2); \ + setup_sprite_tile_add_blocks(sub_tile_height); \ \ while(sub_tile_height) \ { \ @@ -3366,34 +3311,36 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_column_edge_post_adjust_full(edge) \ -#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ sub_tile_height = column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ -#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ u32 tiles_remaining = column_data >> 16; \ sub_tile_height = column_data & 0xFF; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining -= 1; \ \ while(tiles_remaining) \ { \ sub_tile_height = 16; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining--; \ } \ \ sub_tile_height = (column_data >> 8) & 0xFF; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ @@ -3406,15 +3353,18 @@ do \ column_data |= (tile_height - 1) << 16 \ +#define RIGHT_MASK_BIT_SHIFT 8 +#define RIGHT_MASK_BIT_SHIFT_4x 16 + #define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ - edge_mode, edge) \ + edge_mode, edge, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ left_mask_bits = left_block_mask | right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ - texture_mode); \ + texture_mode, x4mode); \ } \ #define setup_sprite_tiled_advance_column() \ @@ -3422,18 +3372,22 @@ do \ if((texture_offset_base & 0xF00) == 0) \ texture_offset_base -= (0x100 + 0xF00) \ +#define FB_PTR_MULTIPLIER 1 +#define FB_PTR_MULTIPLIER_4x 2 + #define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \ - left_mode, right_mode) \ + left_mode, right_mode, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ - s32 fb_ptr_advance_column = 16 - (1024 * height); \ + s32 fb_ptr_advance_column = (16 - (1024 * height)) \ + * FB_PTR_MULTIPLIER##x4mode; \ \ tile_width -= 2; \ left_mask_bits = left_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(left_mode, right, \ - texture_mode); \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ \ left_mask_bits = 0x00; \ @@ -3442,22 +3396,297 @@ do \ while(tile_width) \ { \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(full, none, texture_mode); \ + setup_sprite_tile_column_height_##multi_height(full, none, \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ tile_width--; \ } \ \ left_mask_bits = right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tiled_advance_column(); \ setup_sprite_tile_column_height_##multi_height(right_mode, left, \ - texture_mode); \ + texture_mode, x4mode); \ +} \ + + +/* 4x stuff */ +#define setup_sprite_tiled_initialize_4bpp_4x() \ + setup_sprite_tiled_initialize_4bpp_clut() \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_tile_full_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 24; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + + +#define setup_sprite_tile_full_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + vec_16x8u texels_wide; \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ } \ +#define setup_sprite_tile_half_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + vec_16x8u texels_wide; \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + texture_offset = texture_offset_base + 8; \ + fb_ptr += 16 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + fb_ptr -= 16 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_comapre_left_block_mask() \ + ((left_block_mask & 0xFF) == 0xFF) \ + +#define setup_sprite_comapre_right_block_mask() \ + (((right_block_mask >> 8) & 0xFF) == 0xFF) \ -#define setup_sprite_tiled_builder(texture_mode) \ -void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ + +#define setup_sprite_offset_u_adjust_4x() \ + offset_u *= 2; \ + offset_u_right = offset_u_right * 2 + 1 \ + +#define setup_sprite_comapre_left_block_mask_4x() \ + ((left_block_mask & 0xFFFF) == 0xFFFF) \ + +#define setup_sprite_comapre_right_block_mask_4x() \ + (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \ + + +#define setup_sprite_tiled_builder(texture_mode, x4mode) \ +void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ s32 u, s32 v, s32 width, s32 height, u32 color) \ { \ s32 offset_u = u & 0xF; \ @@ -3469,8 +3698,10 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ s32 tile_width = width_rounded / 16; \ u32 offset_u_right = width_rounded & 0xF; \ \ - u32 left_block_mask = ~(0xFFFF << offset_u); \ - u32 right_block_mask = 0xFFFE << offset_u_right; \ + setup_sprite_offset_u_adjust##x4mode(); \ + \ + u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \ + u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \ \ u32 left_mask_bits; \ u32 right_mask_bits; \ @@ -3487,19 +3718,19 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ u32 texture_offset_base = texture_offset; \ u32 control_mask; \ \ - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \ + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \ u32 num_blocks = psx_gpu->num_blocks; \ block_struct *block = psx_gpu->blocks + num_blocks; \ \ - u16 *texture_block_ptr; \ + u8 *texture_block_ptr; \ vec_8x8u texels; \ \ - setup_sprite_tiled_initialize_##texture_mode(); \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ \ control_mask = tile_width == 1; \ control_mask |= (tile_height == 1) << 1; \ - control_mask |= ((left_block_mask & 0xFF) == 0xFF) << 2; \ - control_mask |= (((right_block_mask >> 8) & 0xFF) == 0xFF) << 3; \ + control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \ + control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \ \ sprites_##texture_mode++; \ \ @@ -3507,74 +3738,86 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ { \ default: \ case 0x0: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ break; \ \ case 0x1: \ - setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ break; \ \ case 0x2: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ break; \ \ case 0x3: \ - setup_sprite_tile_column_width_single(texture_mode, single, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ break; \ \ case 0x4: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ break; \ \ case 0x5: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ break; \ \ case 0x6: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ break; \ \ case 0x7: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, right);\ + setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ break; \ \ case 0x8: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ break; \ \ case 0x9: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ break; \ \ case 0xA: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ break; \ \ case 0xB: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ break; \ \ case 0xC: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ break; \ \ case 0xE: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ break; \ } \ } \ +#ifndef NEON_BUILD +setup_sprite_tiled_builder(4bpp,); +setup_sprite_tiled_builder(8bpp,); -void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); +setup_sprite_tiled_builder(4bpp,_4x); +setup_sprite_tiled_builder(8bpp,_4x); +#endif -#ifndef NEON_BUILD -setup_sprite_tiled_builder(4bpp); -setup_sprite_tiled_builder(8bpp); +#if !defined(NEON_BUILD) || defined(SIMD_BUILD) void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) @@ -3582,7 +3825,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, u32 left_offset = u & 0x7; u32 width_rounded = width + left_offset + 7; - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset); + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset); u32 right_width = width_rounded & 0x7; u32 block_width = width_rounded / 8; u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); @@ -3603,7 +3846,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_offset_base &= ~0x7; - sprites_16bpp++; + stats_add(sprites_16bpp, 1); if(block_width == 1) { @@ -3624,7 +3867,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_block_ptr = texture_page_ptr + (texture_offset_base & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = mask_bits; block->fb_ptr = fb_ptr; @@ -3658,7 +3901,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_offset_base += 1024; texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = left_mask_bits; block->fb_ptr = fb_ptr; @@ -3670,7 +3913,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, while(blocks_remaining) { texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = 0; block->fb_ptr = fb_ptr; @@ -3683,7 +3926,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = right_mask_bits; block->fb_ptr = fb_ptr; @@ -3699,12 +3942,14 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #endif -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +#ifndef NEON_BUILD + +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + x; + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; u32 block_width = (width + 7) / 8; u32 fb_ptr_pitch = 1024 - ((block_width - 1) * 8); u32 blocks_remaining; @@ -3767,6 +4012,90 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } +#endif + +static void __attribute__((noinline)) +setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024); + u32 *vram_ptr; + + u32 num_width; + + if(psx_gpu->num_blocks) + { + flush_render_block_buffer(psx_gpu); + } + + while(height) + { + num_width = width; + + vram_ptr = (void *)vram_ptr16; + if((uintptr_t)vram_ptr16 & 2) + { + *vram_ptr16 = color_32bpp; + vram_ptr = (void *)(vram_ptr16 + 1); + num_width--; + } + + while(num_width >= 4 * 2) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + + vram_ptr += 4; + num_width -= 4 * 2; + } + + while(num_width >= 2) + { + *vram_ptr++ = color_32bpp; + num_width -= 2; + } + + if(num_width > 0) + { + *(u16 *)vram_ptr = color_32bpp; + } + + vram_ptr16 += 1024; + height--; + } +} + +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); + +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && + (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + + while (width > 0) + { + s32 w1 = width > 512 ? 512 : width; + setup_sprite_untextured_512(psx_gpu, x, y, 0, 0, w1, height, color); + x += 512; + width -= 512; + } +} #define setup_sprite_blocks_switch_textured(texture_mode) \ @@ -3855,7 +4184,7 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, render_sprite_blocks_switch_block_texture_mode(4bpp), \ render_sprite_blocks_switch_block_texture_mode(8bpp), \ render_sprite_blocks_switch_block_texture_mode(16bpp), \ - render_sprite_blocks_switch_block_texture_mode(4bpp) \ + render_sprite_blocks_switch_block_texture_mode(16bpp) \ render_block_handler_struct render_sprite_block_handlers[] = @@ -3865,10 +4194,10 @@ render_block_handler_struct render_sprite_block_handlers[] = void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, - s32 width, s32 height, u32 flags, u32 color) + s32 *width, s32 *height, u32 flags, u32 color) { - s32 x_right = x + width - 1; - s32 y_bottom = y + height - 1; + s32 x_right = x + *width - 1; + s32 y_bottom = y + *height - 1; #ifdef PROFILE sprites++; @@ -3877,6 +4206,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right, y_bottom) == 0) { + *width = *height = 0; return; } @@ -3885,7 +4215,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, u32 clip = psx_gpu->viewport_start_x - x; x += clip; u += clip; - width -= clip; + *width -= clip; } if(y < psx_gpu->viewport_start_y) @@ -3893,21 +4223,24 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 clip = psx_gpu->viewport_start_y - y; y += clip; v += clip; - height -= clip; + *height -= clip; } if(x_right > psx_gpu->viewport_end_x) - width -= x_right - psx_gpu->viewport_end_x; + *width -= x_right - psx_gpu->viewport_end_x; if(y_bottom > psx_gpu->viewport_end_y) - height -= y_bottom - psx_gpu->viewport_end_y; + *height -= y_bottom - psx_gpu->viewport_end_y; - if((width <= 0) || (height <= 0)) + if((*width <= 0) || (*height <= 0)) + { + *width = *height = 0; return; + } #ifdef PROFILE - span_pixels += width * height; - spans += height; + span_pixels += *width * *height; + spans += *height; #endif u32 render_state = flags & @@ -3944,11 +4277,11 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, psx_gpu->render_block_handler = render_block_handler; ((setup_sprite_function_type *)render_block_handler->setup_blocks) - (psx_gpu, x, y, u, v, width, height, color); + (psx_gpu, x, y, u, v, *width, *height, color); } #define draw_pixel_line_mask_evaluate_yes() \ - if(*vram_ptr & 0x8000) \ + if((*vram_ptr & 0x8000) == 0) \ #define draw_pixel_line_mask_evaluate_no() \ @@ -4187,9 +4520,6 @@ do \ { \ delta_y *= -1; \ \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(decrement, shading, blending, dithering, \ @@ -4203,9 +4533,6 @@ do \ } \ else \ { \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(increment, shading, blending, dithering, \ @@ -4220,7 +4547,7 @@ do \ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, - u32 color) + u32 color, int double_resolution) { s32 color_r, color_g, color_b; u32 triangle_winding = 0; @@ -4261,6 +4588,7 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, if(vertex_a->x >= vertex_b->x) { vertex_swap(vertex_a, vertex_b); + (void)triangle_winding; } x_a = vertex_a->x; @@ -4272,9 +4600,19 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, delta_x = x_b - x_a; delta_y = y_b - y_a; - if(delta_x >= 1024) + if(delta_x >= 1024 || delta_y >= 512 || delta_y <= -512) return; + if(double_resolution) + { + x_a *= 2; + x_b *= 2; + y_a *= 2; + y_b *= 2; + delta_x *= 2; + delta_y *= 2; + } + flags &= ~RENDER_FLAGS_TEXTURE_MAP; vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a; @@ -4519,6 +4857,9 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, if((width == 0) || (height == 0)) return; + if(width > 1024) + width = 1024; + u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; u32 b = (color >> 16) & 0xFF; @@ -4526,9 +4867,9 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, psx_gpu->mask_msb; u32 color_32bpp = color_16bpp | (color_16bpp << 16); - u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 2048)); + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); - u32 pitch = 2048 / 2 - (width / 2); + u32 pitch = 1024 / 2 - (width / 2); u32 num_width; while(height) @@ -4554,6 +4895,7 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, } } +#ifndef PCSX void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, u32 width, u32 height, u32 pitch) { @@ -4585,7 +4927,7 @@ void render_block_move(psx_gpu_struct *psx_gpu, u32 source_x, u32 source_y, render_block_copy(psx_gpu, psx_gpu->vram_ptr + source_x + (source_y * 1024), dest_x, dest_y, width, height, 1024); } - +#endif void initialize_reciprocal_table(void) { @@ -4599,10 +4941,10 @@ void initialize_reciprocal_table(void) { shift = __builtin_clz(height); height_normalized = height << shift; - height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) / + height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) / height_normalized; - shift = 32 - (52 - shift); + shift = 32 - (51 - shift); reciprocal_table[height] = (height_reciprocal << 10) | shift; } @@ -4641,6 +4983,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; psx_gpu->clut_ptr = psx_gpu->vram_ptr; + psx_gpu->viewport_start_x = psx_gpu->viewport_start_y = 0; + psx_gpu->viewport_end_x = psx_gpu->viewport_end_y = 0; psx_gpu->mask_msb = 0; psx_gpu->texture_window_x = 0; @@ -4653,6 +4997,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512); initialize_reciprocal_table(); + psx_gpu->reciprocal_table_ptr = reciprocal_table; // 00 01 10 11 // 00 0 4 1 5 @@ -4671,6 +5016,11 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2); psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; + + psx_gpu->saved_hres = 256; + + // check some offset + psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0; } u64 get_us(void) @@ -4681,7 +5031,7 @@ u64 get_us(void) return (tv.tv_sec * 1000000ULL) + tv.tv_usec; } -#ifdef NEON_BUILD +#if 0 //def NEON_BUILD u32 get_counter() { @@ -4735,3 +5085,6 @@ void triangle_benchmark(psx_gpu_struct *psx_gpu) #endif +#include "psx_gpu_4x.c" + +// vim:ts=2:sw=2:expandtab