X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu.c;h=a0bff3e99f607a366cafe6e29623d3b91eb8cf92;hb=5fe1a2b17ff2f336a63e36ac791c99096775d5cf;hp=a79254da9f17bd496c993364d1157e3892e1abd6;hpb=37725e8cc9157e2e7819538ee5c98279b8dbefff;p=pcsx_rearmed.git diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index a79254da..a0bff3e9 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -14,7 +14,9 @@ #include #include +#include #include +#include #include "common.h" #ifndef NEON_BUILD @@ -22,6 +24,13 @@ #endif #include "psx_gpu_simd.h" +#if 0 +void dump_r_d(const char *name, void *dump); +void dump_r_q(const char *name, void *dump); +#define dumprd(n) dump_r_d(#n, n.e) +#define dumprq(n) dump_r_q(#n, n.e) +#endif + u32 span_pixels = 0; u32 span_pixel_blocks = 0; u32 spans = 0; @@ -51,6 +60,8 @@ u32 zero_block_spans = 0; u32 texture_cache_loads = 0; u32 false_modulated_blocks = 0; +#define stats_add(stat, count) // stat += count + /* double size for enhancement */ u32 reciprocal_table[512 * 2]; @@ -249,8 +260,8 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, return mask; } -void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, - u32 x2, u32 y2) +static void update_texture_cache_region_(psx_gpu_struct *psx_gpu, + u32 x1, u32 y1, u32 x2, u32 y2) { u32 mask = texture_region_mask(x1, y1, x2, y2); u32 texture_page; @@ -302,6 +313,22 @@ void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, } } +void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, + u32 x2, u32 y2) +{ + s32 w = x2 - x1; + do + { + x2 = x1 + w; + if (x2 > 1023) + x2 = 1023; + update_texture_cache_region_(psx_gpu, x1, y1, x2, y2); + w -= x2 - x1; + x1 = 0; + } + while (unlikely(w > 0)); +} + #ifndef NEON_BUILD void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) @@ -762,31 +789,30 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \ -#ifndef NDEBUG -#define setup_spans_debug_check(span_edge_data_element) \ -{ \ - u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \ - if (_num_spans > MAX_SPANS) \ - *(int *)0 = 1; \ - if (_num_spans < psx_gpu->num_spans) \ - { \ - if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ - *(int *)0 = 1; \ - if(span_edge_data_element.y > 2048) \ - *(int *)0 = 1; \ - } \ -} \ - +#if !defined(NEON_BUILD) && !defined(NDEBUG) +static void setup_spans_debug_check(psx_gpu_struct *psx_gpu, + edge_data_struct *span_edge_data_element) +{ + u32 _num_spans = span_edge_data_element - psx_gpu->span_edge_data; + if (_num_spans > MAX_SPANS) + *(volatile int *)0 = 1; + if (_num_spans < psx_gpu->num_spans) + { + if(span_edge_data_element->num_blocks > MAX_BLOCKS_PER_ROW) + *(volatile int *)0 = 2; + if(span_edge_data_element->y >= 2048) + *(volatile int *)0 = 3; + } +} #else -#define setup_spans_debug_check(span_edge_data_element) \ - +#define setup_spans_debug_check(psx_gpu, span_edge_data_element) #endif #define setup_spans_prologue_alternate_yes() \ vec_2x64s alternate_x; \ vec_2x64s alternate_dx_dy; \ vec_4x32s alternate_x_32; \ - vec_2x32s alternate_x_16; \ + vec_4x16u alternate_x_16; \ \ vec_4x16u alternate_select; \ vec_4x16s y_mid_point; \ @@ -846,6 +872,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_b_offset = psx_gpu->span_b_offset; \ \ vec_8x16u c_0x0001; \ + vec_4x16u c_max_blocks_per_row; \ \ dup_8x16b(c_0x0001, 0x0001); \ dup_8x16b(left_edge, psx_gpu->viewport_start_x); \ @@ -854,6 +881,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, dup_4x16b(c_0x04, 0x04); \ dup_4x16b(c_0x07, 0x07); \ dup_4x16b(c_0xFFFE, 0xFFFE); \ + dup_4x16b(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \ #define compute_edge_delta_x2() \ @@ -1077,6 +1105,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, and_4x16b(span_shift, left_right_x_16.high, c_0x07); \ shl_variable_4x16b(span_shift, c_0xFFFE, span_shift); \ shr_4x16b(left_right_x_16.high, left_right_x_16.high, 3); \ + min_4x16b(left_right_x_16.high, left_right_x_16.high, c_max_blocks_per_row); \ \ u32 i; \ for(i = 0; i < 4; i++) \ @@ -1085,7 +1114,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \ span_edge_data[i].right_mask = span_shift.e[i]; \ span_edge_data[i].y = y_x4.e[i]; \ - setup_spans_debug_check(span_edge_data[i]); \ + setup_spans_debug_check(psx_gpu, &span_edge_data[i]); \ } \ \ span_edge_data += 4; \ @@ -1115,7 +1144,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a + 1; \ @@ -1163,7 +1194,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a - 1; \ @@ -1353,7 +1386,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_prologue_b(); - if(height_minor_a > 0) + if (height_minor_a > 512) + height_minor_a = 512; + if (height_minor_a > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a - 1; @@ -1395,7 +1430,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_clip(increment, no); } - if(height_minor_b > 0) + if (height_minor_b > 512) + height_minor_b = 512; + if (height_minor_b > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a + 1; @@ -1842,7 +1879,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, } \ #define setup_blocks_add_blocks_direct() \ - texel_blocks_untextured += span_num_blocks; \ + stats_add(texel_blocks_untextured, span_num_blocks); \ span_pixel_blocks += span_num_blocks \ @@ -1938,14 +1975,14 @@ setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); void texture_blocks_untextured(psx_gpu_struct *psx_gpu) { if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE) - texel_blocks_untextured += psx_gpu->num_blocks; + stats_add(texel_blocks_untextured, psx_gpu->num_blocks); } void texture_blocks_4bpp(psx_gpu_struct *psx_gpu) { block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_4bpp += num_blocks; + stats_add(texel_blocks_4bpp, num_blocks); vec_8x8u texels_low; vec_8x8u texels_high; @@ -1997,7 +2034,7 @@ void texture_blocks_8bpp(psx_gpu_struct *psx_gpu) block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_8bpp += num_blocks; + stats_add(texel_blocks_8bpp, num_blocks); if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_8bpp_mask) update_texture_8bpp_cache(psx_gpu); @@ -2031,7 +2068,7 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) block_struct *block = psx_gpu->blocks; u32 num_blocks = psx_gpu->num_blocks; - texel_blocks_16bpp += num_blocks; + stats_add(texel_blocks_16bpp, num_blocks); vec_8x16u texels; @@ -3035,6 +3072,7 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, } } } + assert(psx_gpu->span_edge_data[0].y < 1024u); u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -3067,7 +3105,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, render_triangle_p(psx_gpu, vertex_ptrs, flags); } -#ifndef NEON_BUILD +#if !defined(NEON_BUILD) || defined(SIMD_BUILD) void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) { @@ -3120,7 +3158,7 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_fetch_texel_block_8bpp(offset) \ - texture_block_ptr = psx_gpu->texture_page_ptr + \ + texture_block_ptr = (u8 *)psx_gpu->texture_page_ptr + \ ((texture_offset + offset) & texture_mask); \ \ load_64b(texels, texture_block_ptr) \ @@ -3228,7 +3266,7 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_half_8bpp(edge) \ { \ - setup_sprite_tile_add_blocks(sub_tile_height * 2); \ + setup_sprite_tile_add_blocks(sub_tile_height); \ \ while(sub_tile_height) \ { \ @@ -3682,7 +3720,7 @@ void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ u32 num_blocks = psx_gpu->num_blocks; \ block_struct *block = psx_gpu->blocks + num_blocks; \ \ - u16 *texture_block_ptr; \ + u8 *texture_block_ptr; \ vec_8x8u texels; \ \ setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ @@ -3775,6 +3813,9 @@ setup_sprite_tiled_builder(8bpp,); setup_sprite_tiled_builder(4bpp,_4x); setup_sprite_tiled_builder(8bpp,_4x); +#endif + +#if !defined(NEON_BUILD) || defined(SIMD_BUILD) void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) @@ -3803,7 +3844,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_offset_base &= ~0x7; - sprites_16bpp++; + stats_add(sprites_16bpp, 1); if(block_width == 1) { @@ -3824,7 +3865,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_block_ptr = texture_page_ptr + (texture_offset_base & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = mask_bits; block->fb_ptr = fb_ptr; @@ -3858,7 +3899,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_offset_base += 1024; texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = left_mask_bits; block->fb_ptr = fb_ptr; @@ -3870,7 +3911,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, while(blocks_remaining) { texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = 0; block->fb_ptr = fb_ptr; @@ -3883,7 +3924,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); - load_128b(block->texels, texture_block_ptr); + block->texels = *(vec_8x16u *)texture_block_ptr; block->draw_mask_bits = right_mask_bits; block->fb_ptr = fb_ptr; @@ -3897,17 +3938,13 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +#endif + +#ifndef NEON_BUILD + +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { - if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | - RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && - (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) - { - setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); - return; - } - u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; @@ -3975,8 +4012,9 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #endif -void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, - s32 u, s32 v, s32 width, s32 height, u32 color) +static void __attribute__((noinline)) +setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) { u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; @@ -3990,7 +4028,7 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 num_width; - if(psx_gpu->num_blocks > MAX_BLOCKS) + if(psx_gpu->num_blocks) { flush_render_block_buffer(psx_gpu); } @@ -4000,7 +4038,7 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, num_width = width; vram_ptr = (void *)vram_ptr16; - if((long)vram_ptr16 & 2) + if((uintptr_t)vram_ptr16 & 2) { *vram_ptr16 = color_32bpp; vram_ptr = (void *)(vram_ptr16 + 1); @@ -4034,6 +4072,29 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, } } +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); + +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && + (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + + while (width > 0) + { + s32 w1 = width > 512 ? 512 : width; + setup_sprite_untextured_512(psx_gpu, x, y, 0, 0, w1, height, color); + x += 512; + width -= 512; + } +} + #define setup_sprite_blocks_switch_textured(texture_mode) \ setup_sprite_##texture_mode \ @@ -4914,6 +4975,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; psx_gpu->clut_ptr = psx_gpu->vram_ptr; + psx_gpu->viewport_start_x = psx_gpu->viewport_start_y = 0; + psx_gpu->viewport_end_x = psx_gpu->viewport_end_y = 0; psx_gpu->mask_msb = 0; psx_gpu->texture_window_x = 0; @@ -4946,7 +5009,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; - psx_gpu->enhancement_x_threshold = 256; + psx_gpu->saved_hres = 256; } u64 get_us(void) @@ -5012,3 +5075,5 @@ void triangle_benchmark(psx_gpu_struct *psx_gpu) #endif #include "psx_gpu_4x.c" + +// vim:ts=2:sw=2:expandtab