From 3867c6efed8d1cd6cd40f07cd46876f59da8912f Mon Sep 17 00:00:00 2001 From: Exophase Date: Fri, 23 Dec 2011 02:47:19 +0200 Subject: [PATCH] psx_gpu: texture cache fix, updates --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 122 +++++++++++++++----- plugins/gpu_neon/psx_gpu/psx_gpu.h | 10 +- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 113 ++++++++++++++---- plugins/gpu_neon/psx_gpu/psx_gpu_main.c | 67 +++-------- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 46 +++----- 5 files changed, 218 insertions(+), 140 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 84848f8d..2acfedc6 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -20,7 +20,6 @@ u32 span_pixels = 0; u32 span_pixel_blocks = 0; -u32 span_pixel_blocks_unaligned = 0; u32 spans = 0; u32 triangles = 0; u32 sprites = 0; @@ -39,9 +38,6 @@ u32 texel_blocks_8bpp = 0; u32 texel_blocks_16bpp = 0; u32 texel_blocks_untextured = 0; u32 blend_blocks = 0; -u32 untextured_pixels = 0; -u32 blend_pixels = 0; -u32 transparent_pixels = 0; u32 render_buffer_flushes = 0; u32 state_changes = 0; u32 left_split_triangles = 0; @@ -49,8 +45,7 @@ u32 flat_triangles = 0; u32 clipped_triangles = 0; u32 zero_block_spans = 0; u32 texture_cache_loads = 0; -u32 false_modulated_triangles = 0; -u32 false_modulated_sprites = 0; +u32 false_modulated_blocks = 0; u32 reciprocal_table[512]; @@ -241,6 +236,7 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, { u32 mask = texture_region_mask(x1, y1, x2, y2) & psx_gpu->viewport_mask; + psx_gpu->dirty_textures_4bpp_mask |= mask; psx_gpu->dirty_textures_8bpp_mask |= mask; psx_gpu->dirty_textures_8bpp_alternate_mask |= mask; @@ -257,7 +253,7 @@ void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) { u32 current_texture_page = psx_gpu->current_texture_page; - u8 *texture_page_ptr = psx_gpu->texture_page_ptr; + u8 *texture_page_ptr = psx_gpu->texture_page_base; u16 *vram_ptr = psx_gpu->vram_ptr; u32 texel_block; @@ -319,7 +315,7 @@ void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, u32 texture_page) { - u16 *texture_page_ptr = psx_gpu->texture_page_ptr; + u16 *texture_page_ptr = psx_gpu->texture_page_base; u16 *vram_ptr = psx_gpu->vram_ptr; u32 tile_x, tile_y; @@ -413,8 +409,10 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) render_block_handler->shade_blocks(psx_gpu); render_block_handler->blend_blocks(psx_gpu); +#ifdef PROFILE span_pixel_blocks += psx_gpu->num_blocks; render_buffer_flushes++; +#endif psx_gpu->num_blocks = 0; } @@ -1748,6 +1746,8 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, } \ #define setup_blocks_add_blocks_direct() \ + texel_blocks_untextured += span_num_blocks; \ + span_pixel_blocks += span_num_blocks \ #define setup_blocks_builder(shading, texturing, dithering, sw, target) \ @@ -1787,7 +1787,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ s32 pixel_span = span_num_blocks * 8; \ pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF); \ span_pixels += pixel_span; \ - span_pixel_blocks_unaligned += (pixel_span + 7) / 8; \ \ span_num_blocks--; \ while(span_num_blocks) \ @@ -2017,16 +2016,29 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) } \ -#define shade_blocks_textured_modulated_shaded_primitive_load() \ +#define shade_blocks_textured_false_modulated_check_dithered(target) \ + +#define shade_blocks_textured_false_modulated_check_undithered(target) \ + if(psx_gpu->triangle_color == 0x808080) \ + { \ + \ + shade_blocks_textured_unmodulated_##target(psx_gpu); \ + false_modulated_blocks += num_blocks; \ + return; \ + } \ + + +#define shade_blocks_textured_modulated_shaded_primitive_load(dithering, \ + target) \ -#define shade_blocks_textured_modulated_unshaded_primitive_load() \ +#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering, \ + target) \ { \ u32 color = psx_gpu->triangle_color; \ dup_8x8b(colors_r, color); \ dup_8x8b(colors_g, color >> 8); \ dup_8x8b(colors_b, color >> 16); \ - if(psx_gpu->triangle_color == 0x808080) \ - false_modulated_triangles++; \ + shade_blocks_textured_false_modulated_check_##dithering(target); \ } \ #define shade_blocks_textured_modulated_shaded_block_load() \ @@ -2091,7 +2103,8 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \ \ dup_8x16b(d128_0x8000, 0x8000); \ \ - shade_blocks_textured_modulated_##shading##_primitive_load(); \ + shade_blocks_textured_modulated_##shading##_primitive_load(dithering, \ + target); \ \ while(num_blocks) \ { \ @@ -2157,6 +2170,9 @@ void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct *psx_gpu); +void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); +void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); + #ifndef NEON_BUILD shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -2204,8 +2220,38 @@ void shade_blocks_textured_unmodulated_##target(psx_gpu_struct *psx_gpu) \ } \ } \ -void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); +#define shade_blocks_textured_unmodulated_dithered_builder(target) \ +void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \ + *psx_gpu) \ +{ \ + block_struct *block = psx_gpu->blocks; \ + u32 num_blocks = psx_gpu->num_blocks; \ + vec_8x16u draw_mask; \ + vec_8x16u test_mask = psx_gpu->test_mask; \ + u32 draw_mask_bits; \ + \ + vec_8x16u pixels; \ + shade_blocks_load_msb_mask_##target(); \ + \ + while(num_blocks) \ + { \ + vec_8x16u zero_mask; \ + \ + draw_mask_bits = block->draw_mask_bits; \ + dup_8x16b(draw_mask, draw_mask_bits); \ + tst_8x16b(draw_mask, draw_mask, test_mask); \ + \ + pixels = block->texels; \ + \ + cmpeqz_8x16b(zero_mask, pixels); \ + or_8x16b(zero_mask, draw_mask, zero_mask); \ + \ + shade_blocks_store_##target(zero_mask, pixels); \ + \ + num_blocks--; \ + block++; \ + } \ +} \ #ifndef NEON_BUILD @@ -2773,11 +2819,15 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y); +#ifdef PROFILE triangles++; +#endif if(triangle_area == 0) { +#ifdef PROFILE trivial_rejects++; +#endif return; } @@ -2797,7 +2847,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, if((y_bottom - y_top) >= 512) { +#ifdef PROFILE trivial_rejects++; +#endif return; } @@ -2821,14 +2873,18 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, if((c->x - a->x) >= 1024) { +#ifdef PROFILE trivial_rejects++; +#endif return; } if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x, y_bottom) == 0) { +#ifdef PROFILE trivial_rejects++; +#endif return; } @@ -2922,7 +2978,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, break; } +#ifdef PROFILE spans += psx_gpu->num_spans; +#endif u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -2934,7 +2992,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, { psx_gpu->render_state = render_state; flush_render_block_buffer(psx_gpu); +#ifdef PROFILE state_changes++; +#endif } psx_gpu->primitive_type = PRIMITIVE_TYPE_TRIANGLE; @@ -3369,7 +3429,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, u32 left_offset = u & 0x7; u32 width_rounded = width + left_offset + 7; - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - left_offset); + u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset); u32 right_width = width_rounded & 0x7; u32 block_width = width_rounded / 8; u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); @@ -3519,7 +3579,10 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, { blocks_remaining = block_width - 1; num_blocks += block_width; + +#ifdef PROFILE sprite_blocks += block_width; +#endif if(num_blocks > MAX_BLOCKS) { @@ -3654,6 +3717,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 x_right = x + width - 1; s32 y_bottom = y + height - 1; +#ifdef PROFILE + sprites++; +#endif + if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right, y_bottom) == 0) { @@ -3685,10 +3752,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if((width <= 0) || (height <= 0)) return; - sprites++; - +#ifdef PROFILE span_pixels += width * height; spans += height; +#endif u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -3701,7 +3768,9 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, { psx_gpu->render_state = render_state; flush_render_block_buffer(psx_gpu); +#ifdef PROFILE state_changes++; +#endif } psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE; @@ -4032,7 +4101,9 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, u32 control_mask; +#ifdef PROFILE lines++; +#endif if(vertex_a->x >= vertex_b->x) { @@ -4276,8 +4347,6 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); - //printf("copy for %d, %d\n", width, height); - for(draw_y = 0; draw_y < height; draw_y++) { for(draw_x = 0; draw_x < width; draw_x++) @@ -4329,15 +4398,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->test_mask = test_mask; - psx_gpu->pixel_count_mode = 0; - psx_gpu->pixel_compare_mode = 0; - - psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512); - psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512); - memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512); - memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512); - psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512); - psx_gpu->dirty_textures_4bpp_mask = 0xFFFFFFFF; psx_gpu->dirty_textures_8bpp_mask = 0xFFFFFFFF; psx_gpu->dirty_textures_8bpp_alternate_mask = 0xFFFFFFFF; @@ -4354,6 +4414,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->vram_ptr = vram; + psx_gpu->texture_page_base = psx_gpu->vram_ptr; psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; psx_gpu->clut_ptr = psx_gpu->vram_ptr; @@ -4374,7 +4435,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) // d1: (2 3 6 7): y0 // d2: (4 5 6 7): x0 ^ y0 - psx_gpu->dither_table[0] = dither_table_row(-4, 0, -3, 1); psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1); psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 49425ceb..137dda97 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -130,12 +130,11 @@ typedef struct u32 dirty_textures_8bpp_alternate_mask; u32 triangle_color; - u32 primitive_color; - u32 dither_table[4]; struct render_block_handler_struct *render_block_handler; void *texture_page_ptr; + void *texture_page_base; u16 *clut_ptr; u16 *vram_ptr; @@ -189,13 +188,6 @@ typedef struct u8 texture_4bpp_cache[32][256 * 256]; u8 texture_8bpp_even_cache[16][256 * 256]; u8 texture_8bpp_odd_cache[16][256 * 256]; - - u32 pixel_count_mode; - u32 pixel_compare_mode; - - u8 *vram_pixel_counts_a; - u8 *vram_pixel_counts_b; - u16 *compare_vram; } psx_gpu_struct; typedef struct __attribute__((aligned(16))) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 54605b84..fd997980 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -38,10 +38,10 @@ #define psx_gpu_dirty_textures_8bpp_mask_offset 172 #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176 #define psx_gpu_triangle_color_offset 180 -#define psx_gpu_primitive_color_offset 184 -#define psx_gpu_dither_table_offset 188 -#define psx_gpu_render_block_handler_offset 204 -#define psx_gpu_texture_page_ptr_offset 208 +#define psx_gpu_dither_table_offset 184 +#define psx_gpu_render_block_handler_offset 200 +#define psx_gpu_texture_page_ptr_offset 204 +#define psx_gpu_texture_page_base_offset 208 #define psx_gpu_clut_ptr_offset 212 #define psx_gpu_vram_ptr_offset 216 @@ -1955,6 +1955,8 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) vdup.u16 colors, color add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset + orr color, color, lsl #16 + 0: ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] @@ -1981,12 +1983,21 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) 3: ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ] - eor right_mask, right_mask, #0xFF - 4: - strh color, [ fb_ptr ], #2 - movs right_mask, right_mask, lsr #1 - bne 4b + cmp right_mask, #0x0 + beq 5f + + tst right_mask, #0xF + streq color, [ fb_ptr ], #4 + moveq right_mask, right_mask, lsr #4 + streq color, [ fb_ptr ], #4 + + tst right_mask, #0x3 + streq color, [ fb_ptr ], #4 + moveq right_mask, right_mask, lsr #2 + + tst right_mask, #0x1 + streqh color, [ fb_ptr ] 1: add span_edge_data, span_edge_data, #8 @@ -1997,6 +2008,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) ldmia sp!, { r4 - r11, pc } + 5: + vst1.u32 { colors }, [ fb_ptr ] + bal 1b #undef c_64 @@ -2337,6 +2351,7 @@ setup_blocks_shaded_untextured_indirect_builder(dithered) #define draw_mask q0 #define pixels_low d16 +#define pixels_high d17 @@ -2500,23 +2515,67 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ 3: \ setup_blocks_shaded_untextured_dither_a_##dithering(); \ \ - ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ]; \ + ldrh right_mask, [ span_edge_data, #edge_data_right_mask_offset ]; \ setup_blocks_shaded_untextured_dither_b_##dithering(); \ \ vshr.u8 r_whole_8, r_whole_8, #3; \ + rbit right_mask, right_mask; \ vmov pixels, msb_mask; \ vbic.u8 gb_whole_8, gb_whole_8, d128_0x7; \ - eor right_mask, right_mask, #0xFF; \ + clz right_mask, right_mask; \ \ vmlal.u8 pixels, r_whole_8, d64_1; \ vmlal.u8 pixels, g_whole_8, d64_4; \ vmlal.u8 pixels, b_whole_8, d64_128; \ \ + ldr pc, [ pc, right_mask, lsl #2 ]; \ + nop; \ + nop; \ + .word 4f; \ + .word 5f; \ + .word 6f; \ + .word 7f; \ + .word 8f; \ + .word 9f; \ + .word 10f; \ + .word 11f; \ + \ 4: \ - vst1.u16 { pixels_low[0] }, [ fb_ptr ]!; \ - vext.16 pixels, pixels, #1; \ - movs right_mask, right_mask, lsr #1; \ - bne 4b; \ + vst1.u16 { pixels_low[0] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 5: \ + vst1.u32 { pixels_low[0] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 6: \ + vst1.u32 { pixels_low[0] }, [ fb_ptr ]!; \ + vst1.u16 { pixels_low[2] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 7: \ + vst1.u32 { pixels_low }, [ fb_ptr ]; \ + bal 1f; \ + \ + 8: \ + vst1.u32 { pixels_low }, [ fb_ptr ]!; \ + vst1.u16 { pixels_high[0] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 9: \ + vst1.u32 { pixels_low }, [ fb_ptr ]!; \ + vst1.u32 { pixels_high[0] }, [ fb_ptr ]!; \ + bal 1f; \ + \ + 10: \ + vst1.u32 { pixels_low }, [ fb_ptr ]!; \ + vst1.u32 { pixels_high[0] }, [ fb_ptr ]!; \ + vst1.u16 { pixels_high[2] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 11: \ + vst1.u32 { pixels }, [ fb_ptr ]; \ + bal 1f; \ \ 1: \ add span_uvrg_offset, span_uvrg_offset, #16; \ @@ -2957,6 +3016,8 @@ function(texture_blocks_16bpp) #define psx_gpu r0 #define num_blocks r1 #define color_ptr r2 +#define colors_scalar r2 +#define colors_scalar_compare r3 #define mask_msb_ptr r2 #define block_ptr_load_a r0 @@ -3013,9 +3074,21 @@ function(texture_blocks_16bpp) add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ vld1.u16 { msb_mask_low[], msb_mask_high[] }, [ mask_msb_ptr, :16 ] \ -#define shade_blocks_textured_modulated_prologue_shaded() \ -#define shade_blocks_textured_modulated_prologue_unshaded() \ +#define shade_blocks_textured_modulated_prologue_shaded(dithering, target) \ + +#define shade_blocks_textured_false_modulation_check_undithered(target) \ + ldr colors_scalar, [ psx_gpu, #psx_gpu_triangle_color_offset ]; \ + movw colors_scalar_compare, #0x8080; \ + \ + movt colors_scalar_compare, #0x80; \ + cmp colors_scalar, colors_scalar_compare; \ + beq shade_blocks_textured_unmodulated_##target \ + +#define shade_blocks_textured_false_modulation_check_dithered(target) \ + +#define shade_blocks_textured_modulated_prologue_unshaded(dithering, target) \ + shade_blocks_textured_false_modulation_check_##dithering(target); \ add color_ptr, psx_gpu, #psx_gpu_triangle_color_offset; \ vld1.u32 { colors_r[] }, [ color_ptr, :32 ]; \ vdup.u8 colors_g, colors_r[1]; \ @@ -3086,13 +3159,13 @@ function(texture_blocks_16bpp) .align 3; \ \ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ + shade_blocks_textured_modulated_prologue_##shading(dithering, target); \ stmdb sp!, { r4 - r5, lr }; \ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \ \ vld1.u32 { test_mask }, [ psx_gpu, :128 ]; \ \ shade_blocks_textured_modulated_prologue_##target(); \ - shade_blocks_textured_modulated_prologue_##shading(); \ \ add block_ptr_load_a, psx_gpu, #psx_gpu_blocks_offset; \ mov c_32, #32; \ @@ -5271,7 +5344,7 @@ function(update_texture_4bpp_cache) ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ] - ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] + ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ] ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ] and current_texture_page_x, current_texture_page, #0xF @@ -5375,7 +5448,7 @@ function(update_texture_8bpp_cache_slice) ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ] ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ] - ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] + ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ] mov tile_y, #16 and texture_page_x, texture_page, #0xF diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index 1eadc794..0f85604f 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -20,7 +20,6 @@ extern u32 span_pixels; extern u32 span_pixel_blocks; -extern u32 span_pixel_blocks_unaligned; extern u32 spans; extern u32 triangles; extern u32 sprites; @@ -38,9 +37,6 @@ extern u32 texel_blocks_8bpp; extern u32 texel_blocks_16bpp; extern u32 texel_blocks_untextured; extern u32 blend_blocks; -extern u32 untextured_pixels; -extern u32 blend_pixels; -extern u32 transparent_pixels; extern u32 render_buffer_flushes; extern u32 state_changes; extern u32 trivial_rejects; @@ -49,8 +45,7 @@ extern u32 flat_triangles; extern u32 clipped_triangles; extern u32 zero_block_spans; extern u32 texture_cache_loads; -extern u32 false_modulated_triangles; -extern u32 false_modulated_sprites; +extern u32 false_modulated_blocks; static u32 mismatches; @@ -64,7 +59,7 @@ typedef struct static gpu_dump_struct state; psx_gpu_struct __attribute__((aligned(256))) _psx_gpu; -u16 __attribute__((aligned(256))) _vram[1024 * 512]; +u16 __attribute__((aligned(256))) _vram[(1024 * 512) + 1024]; #define percent_of(numerator, denominator) \ ((((double)(numerator)) / (denominator)) * 100.0) \ @@ -81,7 +76,6 @@ void clear_stats(void) lines = 0; span_pixels = 0; span_pixel_blocks = 0; - span_pixel_blocks_unaligned = 0; spans = 0; texels_4bpp = 0; texels_8bpp = 0; @@ -91,9 +85,6 @@ void clear_stats(void) texel_blocks_8bpp = 0; texel_blocks_16bpp = 0; blend_blocks = 0; - untextured_pixels = 0; - blend_pixels = 0; - transparent_pixels = 0; render_buffer_flushes = 0; state_changes = 0; trivial_rejects = 0; @@ -102,8 +93,7 @@ void clear_stats(void) clipped_triangles = 0; zero_block_spans = 0; texture_cache_loads = 0; - false_modulated_triangles = 0; - false_modulated_sprites = 0; + false_modulated_blocks = 0; } void update_screen(psx_gpu_struct *psx_gpu, SDL_Surface *screen) @@ -165,7 +155,7 @@ int main(int argc, char *argv[]) size = ftell(list_file); fseek(list_file, 0, SEEK_SET); //size = 0; - + list = malloc(size); fread(list, 1, size, list_file); fclose(list_file); @@ -175,44 +165,26 @@ int main(int argc, char *argv[]) SDL_Init(SDL_INIT_EVERYTHING); screen = SDL_SetVideoMode(1024, 512, 32, 0); } - - initialize_psx_gpu(psx_gpu, _vram); #ifdef NEON_BUILD system("ofbset -fb /dev/fb1 -mem 6291456 -en 0"); u32 fbdev_handle = open("/dev/fb1", O_RDWR); - psx_gpu->vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE, + u16 *vram_ptr = + vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE, MAP_SHARED | 0xA0000000, fbdev_handle, 0)); - psx_gpu->vram_ptr += 64; -#endif - + vram_ptr += 64; + initialize_psx_gpu(psx_gpu, vram_ptr + 64); +#else + initialize_psx_gpu(psx_gpu, _vram + 64); +#endif #ifdef NEON_BUILD //triangle_benchmark(psx_gpu); //return 0; #endif -#ifdef FULL_COMPARE_MODE - psx_gpu->pixel_count_mode = 1; - psx_gpu->pixel_compare_mode = 0; memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); - //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512); - gpu_parse(psx_gpu, list, size); - - psx_gpu->pixel_count_mode = 0; - psx_gpu->pixel_compare_mode = 1; - memcpy(psx_gpu->compare_vram, state.vram, 1024 * 512 * 2); - memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); - //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512); - clear_stats(); - gpu_parse(psx_gpu, list, size); - flush_render_block_buffer(psx_gpu); -#else - memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); - - psx_gpu->pixel_count_mode = 0; - psx_gpu->pixel_compare_mode = 0; clear_stats(); @@ -232,7 +204,7 @@ int main(int argc, char *argv[]) gpu_parse(psx_gpu, list, size); flush_render_block_buffer(psx_gpu); - printf("%s: ", argv[1]); + printf("%-64s: ", argv[1]); #ifdef NEON_BUILD u32 cycles_elapsed = get_counter() - cycles; @@ -265,17 +237,14 @@ int main(int argc, char *argv[]) } } #endif -#endif #if 0 printf("\n"); - printf(" %d pixels, %d pixel blocks (%d unaligned), %d spans\n" - " (%lf pixels per block (%lf unaligned, r %lf), %lf pixels per span),\n" + printf(" %d pixels, %d pixel blocks, %d spans\n" + " (%lf pixels per block, %lf pixels per span),\n" " %lf blocks per span (%lf per non-zero span), %lf overdraw)\n\n", - span_pixels, span_pixel_blocks, span_pixel_blocks_unaligned, spans, + span_pixels, span_pixel_blocks, spans, (double)span_pixels / span_pixel_blocks, - (double)span_pixels / span_pixel_blocks_unaligned, - (double)span_pixel_blocks / span_pixel_blocks_unaligned, (double)span_pixels / spans, (double)span_pixel_blocks / spans, (double)span_pixel_blocks / (spans - zero_block_spans), @@ -283,10 +252,10 @@ int main(int argc, char *argv[]) ((psx_gpu->viewport_end_x - psx_gpu->viewport_start_x) * (psx_gpu->viewport_end_y - psx_gpu->viewport_start_y))); - printf(" %d triangles (%d false modulated)\n" + printf(" %d triangles\n" " (%d trivial rejects, %lf%% flat, %lf%% left split, %lf%% clipped)\n" " (%lf pixels per triangle, %lf rows per triangle)\n\n", - triangles, false_modulated_triangles, trivial_rejects, + triangles, trivial_rejects, percent_of(flat_triangles, triangles), percent_of(left_split_triangles, triangles), percent_of(clipped_triangles, triangles), @@ -306,6 +275,8 @@ int main(int argc, char *argv[]) percent_of(sprite_blocks, span_pixel_blocks)); printf(" %7d blended blocks (%lf%%)\n", blend_blocks, percent_of(blend_blocks, span_pixel_blocks)); + printf(" %7d false-mod blocks (%lf%%)\n", false_modulated_blocks, + percent_of(false_modulated_blocks, span_pixel_blocks)); printf("\n"); printf(" %lf blocks per render buffer flush\n", (double)span_pixel_blocks / render_buffer_flushes); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index f6143ee3..fc9f3fba 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -38,61 +38,53 @@ const u8 command_lengths[256] = void update_texture_ptr(psx_gpu_struct *psx_gpu) { + u8 *texture_base; u8 *texture_ptr; switch((psx_gpu->render_state_base >> 8) & 0x3) { default: case TEXTURE_MODE_4BPP: -#ifdef TEXTURE_CACHE_4BPP - texture_ptr = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page]; + texture_base = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page]; + + texture_ptr = texture_base; texture_ptr += psx_gpu->texture_window_x & 0xF; texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4; texture_ptr += (psx_gpu->texture_window_x >> 4) << 8; texture_ptr += (psx_gpu->texture_window_y >> 4) << 12; -#else - texture_ptr = (u8 *)(psx_gpu->vram_ptr); - texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128; - texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; - texture_ptr += psx_gpu->texture_window_x / 2; - texture_ptr += (psx_gpu->texture_window_y) * 2048; -#endif break; case TEXTURE_MODE_8BPP: -#ifdef TEXTURE_CACHE_8BPP if(psx_gpu->current_texture_page & 0x1) { - texture_ptr = + texture_base = psx_gpu->texture_8bpp_odd_cache[psx_gpu->current_texture_page >> 1]; } else { - texture_ptr = + texture_base = psx_gpu->texture_8bpp_even_cache[psx_gpu->current_texture_page >> 1]; } + texture_ptr = texture_base; + texture_ptr += psx_gpu->texture_window_x & 0xF; texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4; texture_ptr += (psx_gpu->texture_window_x >> 4) << 8; texture_ptr += (psx_gpu->texture_window_y >> 4) << 12; -#else - texture_ptr = (u8 *)(psx_gpu->vram_ptr); - texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128; - texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; - texture_ptr += psx_gpu->texture_window_x; - texture_ptr += (psx_gpu->texture_window_y) * 2048; -#endif break; case TEXTURE_MODE_16BPP: - texture_ptr = (u8 *)(psx_gpu->vram_ptr); - texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128; - texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; + texture_base = (u8 *)(psx_gpu->vram_ptr); + texture_base += (psx_gpu->current_texture_page & 0xF) * 128; + texture_base += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; + + texture_ptr = texture_base; texture_ptr += psx_gpu->texture_window_x * 2; texture_ptr += (psx_gpu->texture_window_y) * 2048; break; } + psx_gpu->texture_page_base = texture_base; psx_gpu->texture_page_ptr = texture_ptr; } @@ -447,8 +439,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) u32 width = list_s16[4] & 0x3FF; u32 height = list_s16[5] & 0x1FF; - psx_gpu->primitive_color = list[0] & 0xFFFFFF; - render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); break; } @@ -461,7 +451,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) u32 width = list_s16[6] & 0x3FF; u32 height = list_s16[7] & 0x1FF; - psx_gpu->primitive_color = list[0] & 0xFFFFFF; set_clut(psx_gpu, list_s16[5]); render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, @@ -477,8 +466,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) s32 x = list_s16[2] + psx_gpu->offset_x; s32 y = list_s16[3] + psx_gpu->offset_y; - psx_gpu->primitive_color = list[0] & 0xFFFFFF; - render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); break; } @@ -491,8 +478,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) s32 x = list_s16[2] + psx_gpu->offset_x; s32 y = list_s16[3] + psx_gpu->offset_y; - psx_gpu->primitive_color = list[0] & 0xFFFFFF; - render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); break; } @@ -506,7 +491,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) s32 y = list_s16[3] + psx_gpu->offset_y; u32 uv = list_s16[4]; - psx_gpu->primitive_color = list[0] & 0xFFFFFF; set_clut(psx_gpu, list_s16[5]); render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, @@ -522,7 +506,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) s32 x = list_s16[2] + psx_gpu->offset_x; s32 y = list_s16[3] + psx_gpu->offset_y; - psx_gpu->primitive_color = list[0] & 0xFFFFFF; render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); break; } @@ -536,7 +519,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size) s32 y = list_s16[3] + psx_gpu->offset_y; u32 uv = list_s16[4]; - psx_gpu->primitive_color = list[0] & 0xFFFFFF; set_clut(psx_gpu, list_s16[5]); render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16, -- 2.39.2