X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu.c;h=cc40748c01448191fd5689b53236d7b0b7f75817;hp=f29fa7e75c85a00e8598e4b2da2f24ddb7dd4124;hb=f9248bbfa31729f0d902db00269e50f2d03082ba;hpb=e8c0e0bb6288aeeb2a4cb6709608340836778886 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index f29fa7e7..cc40748c 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -20,7 +20,6 @@ u32 span_pixels = 0; u32 span_pixel_blocks = 0; -u32 span_pixel_blocks_unaligned = 0; u32 spans = 0; u32 triangles = 0; u32 sprites = 0; @@ -39,9 +38,6 @@ u32 texel_blocks_8bpp = 0; u32 texel_blocks_16bpp = 0; u32 texel_blocks_untextured = 0; u32 blend_blocks = 0; -u32 untextured_pixels = 0; -u32 blend_pixels = 0; -u32 transparent_pixels = 0; u32 render_buffer_flushes = 0; u32 state_changes = 0; u32 left_split_triangles = 0; @@ -49,8 +45,7 @@ u32 flat_triangles = 0; u32 clipped_triangles = 0; u32 zero_block_spans = 0; u32 texture_cache_loads = 0; -u32 false_modulated_triangles = 0; -u32 false_modulated_sprites = 0; +u32 false_modulated_blocks = 0; u32 reciprocal_table[512]; @@ -91,7 +86,7 @@ struct render_block_handler_struct blend_blocks_function_type *blend_blocks; }; -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD u32 fixed_reciprocal(u32 denominator, u32 *_shift) { @@ -241,6 +236,7 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, { u32 mask = texture_region_mask(x1, y1, x2, y2) & psx_gpu->viewport_mask; + psx_gpu->dirty_textures_4bpp_mask |= mask; psx_gpu->dirty_textures_8bpp_mask |= mask; psx_gpu->dirty_textures_8bpp_alternate_mask |= mask; @@ -252,12 +248,12 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, u32 texture_page); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) { u32 current_texture_page = psx_gpu->current_texture_page; - u8 *texture_page_ptr = psx_gpu->texture_page_ptr; + u8 *texture_page_ptr = psx_gpu->texture_page_base; u16 *vram_ptr = psx_gpu->vram_ptr; u32 texel_block; @@ -285,6 +281,7 @@ void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) while(sub_x) { texel_block = *vram_ptr; + texture_page_ptr[0] = texel_block & 0xF; texture_page_ptr[1] = (texel_block >> 4) & 0xF; texture_page_ptr[2] = (texel_block >> 8) & 0xF; @@ -319,7 +316,7 @@ void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, u32 texture_page) { - u16 *texture_page_ptr = psx_gpu->texture_page_ptr; + u16 *texture_page_ptr = psx_gpu->texture_page_base; u16 *vram_ptr = psx_gpu->vram_ptr; u32 tile_x, tile_y; @@ -413,8 +410,10 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) render_block_handler->shade_blocks(psx_gpu); render_block_handler->blend_blocks(psx_gpu); +#ifdef PROFILE span_pixel_blocks += psx_gpu->num_blocks; render_buffer_flushes++; +#endif psx_gpu->num_blocks = 0; } @@ -424,7 +423,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, vertex_struct *b, vertex_struct *c); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD #define setup_gradient_calculation_input(set, vertex) \ /* First type is: uvrg bxxx xxxx */\ @@ -1110,7 +1109,7 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, vertex_struct *v_b, vertex_struct *v_c); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, vertex_struct *v_b, vertex_struct *v_c) @@ -1748,6 +1747,8 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, } \ #define setup_blocks_add_blocks_direct() \ + texel_blocks_untextured += span_num_blocks; \ + span_pixel_blocks += span_num_blocks \ #define setup_blocks_builder(shading, texturing, dithering, sw, target) \ @@ -1787,7 +1788,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ s32 pixel_span = span_num_blocks * 8; \ pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF); \ span_pixels += pixel_span; \ - span_pixel_blocks_unaligned += (pixel_span + 7) / 8; \ \ span_num_blocks--; \ while(span_num_blocks) \ @@ -1848,7 +1848,7 @@ void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct //setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD setup_blocks_builder(shaded, textured, dithered, swizzled, indirect); setup_blocks_builder(shaded, textured, dithered, unswizzled, indirect); @@ -1871,7 +1871,7 @@ void texture_blocks_4bpp(psx_gpu_struct *psx_gpu); void texture_blocks_8bpp(psx_gpu_struct *psx_gpu); void texture_blocks_16bpp(psx_gpu_struct *psx_gpu); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD void texture_blocks_untextured(psx_gpu_struct *psx_gpu) { @@ -2017,16 +2017,33 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) } \ -#define shade_blocks_textured_modulated_shaded_primitive_load() \ +#define shade_blocks_textured_false_modulated_check_dithered(target) \ + if(psx_gpu->triangle_color == 0x808080) \ + { \ + false_modulated_blocks += num_blocks; \ + } \ + +#define shade_blocks_textured_false_modulated_check_undithered(target) \ + if(psx_gpu->triangle_color == 0x808080) \ + { \ + \ + shade_blocks_textured_unmodulated_##target(psx_gpu); \ + false_modulated_blocks += num_blocks; \ + return; \ + } \ + + +#define shade_blocks_textured_modulated_shaded_primitive_load(dithering, \ + target) \ -#define shade_blocks_textured_modulated_unshaded_primitive_load() \ +#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering, \ + target) \ { \ u32 color = psx_gpu->triangle_color; \ dup_8x8b(colors_r, color); \ dup_8x8b(colors_g, color >> 8); \ dup_8x8b(colors_b, color >> 16); \ - if(psx_gpu->triangle_color == 0x808080) \ - false_modulated_triangles++; \ + shade_blocks_textured_false_modulated_check_##dithering(target); \ } \ #define shade_blocks_textured_modulated_shaded_block_load() \ @@ -2091,7 +2108,8 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \ \ dup_8x16b(d128_0x8000, 0x8000); \ \ - shade_blocks_textured_modulated_##shading##_primitive_load(); \ + shade_blocks_textured_modulated_##shading##_primitive_load(dithering, \ + target); \ \ while(num_blocks) \ { \ @@ -2157,7 +2175,10 @@ void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct *psx_gpu); -#ifndef PANDORA_BUILD +void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); +void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); + +#ifndef NEON_BUILD shade_blocks_textured_modulated_builder(shaded, dithered, direct); shade_blocks_textured_modulated_builder(shaded, undithered, direct); @@ -2204,10 +2225,40 @@ void shade_blocks_textured_unmodulated_##target(psx_gpu_struct *psx_gpu) \ } \ } \ -void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); +#define shade_blocks_textured_unmodulated_dithered_builder(target) \ +void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \ + *psx_gpu) \ +{ \ + block_struct *block = psx_gpu->blocks; \ + u32 num_blocks = psx_gpu->num_blocks; \ + vec_8x16u draw_mask; \ + vec_8x16u test_mask = psx_gpu->test_mask; \ + u32 draw_mask_bits; \ + \ + vec_8x16u pixels; \ + shade_blocks_load_msb_mask_##target(); \ + \ + while(num_blocks) \ + { \ + vec_8x16u zero_mask; \ + \ + draw_mask_bits = block->draw_mask_bits; \ + dup_8x16b(draw_mask, draw_mask_bits); \ + tst_8x16b(draw_mask, draw_mask, test_mask); \ + \ + pixels = block->texels; \ + \ + cmpeqz_8x16b(zero_mask, pixels); \ + or_8x16b(zero_mask, draw_mask, zero_mask); \ + \ + shade_blocks_store_##target(zero_mask, pixels); \ + \ + num_blocks--; \ + block++; \ + } \ +} \ -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD shade_blocks_textured_unmodulated_builder(indirect) shade_blocks_textured_unmodulated_builder(direct) @@ -2218,8 +2269,8 @@ shade_blocks_textured_unmodulated_builder(direct) void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu); void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu); -#ifndef PANDORA_BUILD - +#ifndef NEON_BUILD + void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu) { } @@ -2452,7 +2503,7 @@ void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu); void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu); void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu) { @@ -2760,8 +2811,6 @@ char *render_block_flag_strings[] = (triangle_y_direction_##direction_c << 4) | \ (triangle_winding_##winding << 6)) \ -psx_gpu_struct __attribute__((aligned(64))) psx_gpu_alt; - void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags) { @@ -2775,11 +2824,15 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y); +#ifdef PROFILE triangles++; +#endif if(triangle_area == 0) { +#ifdef PROFILE trivial_rejects++; +#endif return; } @@ -2799,7 +2852,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, if((y_bottom - y_top) >= 512) { +#ifdef PROFILE trivial_rejects++; +#endif return; } @@ -2823,14 +2878,18 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, if((c->x - a->x) >= 1024) { +#ifdef PROFILE trivial_rejects++; +#endif return; } if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x, y_bottom) == 0) { +#ifdef PROFILE trivial_rejects++; +#endif return; } @@ -2924,7 +2983,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, break; } +#ifdef PROFILE spans += psx_gpu->num_spans; +#endif u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -2936,7 +2997,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, { psx_gpu->render_state = render_state; flush_render_block_buffer(psx_gpu); +#ifdef PROFILE state_changes++; +#endif } psx_gpu->primitive_type = PRIMITIVE_TYPE_TRIANGLE; @@ -2950,7 +3013,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) { @@ -3361,7 +3424,7 @@ void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD setup_sprite_tiled_builder(4bpp); setup_sprite_tiled_builder(8bpp); @@ -3371,7 +3434,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, u32 left_offset = u & 0x7; u32 width_rounded = width + left_offset + 7; - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - left_offset); + u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset); u32 right_width = width_rounded & 0x7; u32 block_width = width_rounded / 8; u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); @@ -3521,7 +3584,10 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, { blocks_remaining = block_width - 1; num_blocks += block_width; + +#ifdef PROFILE sprite_blocks += block_width; +#endif if(num_blocks > MAX_BLOCKS) { @@ -3656,6 +3722,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 x_right = x + width - 1; s32 y_bottom = y + height - 1; +#ifdef PROFILE + sprites++; +#endif + if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right, y_bottom) == 0) { @@ -3687,23 +3757,25 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if((width <= 0) || (height <= 0)) return; - sprites++; - +#ifdef PROFILE span_pixels += width * height; spans += height; +#endif u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | RENDER_FLAGS_TEXTURE_MAP); render_state |= (psx_gpu->render_state_base & ~RENDER_STATE_DITHER); - + if((psx_gpu->render_state != render_state) || (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)) { psx_gpu->render_state = render_state; flush_render_block_buffer(psx_gpu); +#ifdef PROFILE state_changes++; +#endif } psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE; @@ -3888,9 +3960,18 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, #define set_line_gradients(minor) \ { \ s32 gradient_divisor = delta_##minor; \ - gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor; \ - gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor; \ - gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor; \ + if(gradient_divisor != 0) \ + { \ + gradient_r = int_to_fixed(vertex_b->r - vertex_a->r) / gradient_divisor; \ + gradient_g = int_to_fixed(vertex_b->g - vertex_a->g) / gradient_divisor; \ + gradient_b = int_to_fixed(vertex_b->b - vertex_a->b) / gradient_divisor; \ + } \ + else \ + { \ + gradient_r = 0; \ + gradient_g = 0; \ + gradient_b = 0; \ + } \ current_r = fixed_center(vertex_a->r); \ current_g = fixed_center(vertex_a->g); \ current_b = fixed_center(vertex_a->b); \ @@ -4025,7 +4106,9 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, u32 control_mask; +#ifdef PROFILE lines++; +#endif if(vertex_a->x >= vertex_b->x) { @@ -4233,9 +4316,12 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, u32 width, u32 height) { + if((width == 0) || (height == 0)) + return; + invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); -#ifndef PANDORA_BUILD +#ifndef NEON_BUILD u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; u32 b = (color >> 16) & 0xFF; @@ -4267,9 +4353,10 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024); u32 draw_x, draw_y; - invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); + if((width == 0) || (height == 0)) + return; - //printf("copy for %d, %d\n", width, height); + invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); for(draw_y = 0; draw_y < height; draw_y++) { @@ -4322,15 +4409,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->test_mask = test_mask; - psx_gpu->pixel_count_mode = 0; - psx_gpu->pixel_compare_mode = 0; - - psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512); - psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512); - memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512); - memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512); - psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512); - psx_gpu->dirty_textures_4bpp_mask = 0xFFFFFFFF; psx_gpu->dirty_textures_8bpp_mask = 0xFFFFFFFF; psx_gpu->dirty_textures_8bpp_alternate_mask = 0xFFFFFFFF; @@ -4347,11 +4425,17 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->vram_ptr = vram; + psx_gpu->texture_page_base = psx_gpu->vram_ptr; psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; psx_gpu->clut_ptr = psx_gpu->vram_ptr; psx_gpu->mask_msb = 0; + psx_gpu->texture_window_x = 0; + psx_gpu->texture_window_y = 0; + psx_gpu->texture_mask_width = 0xFF; + psx_gpu->texture_mask_height = 0xFF; + memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512); initialize_reciprocal_table(); @@ -4367,7 +4451,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) // d1: (2 3 6 7): y0 // d2: (4 5 6 7): x0 ^ y0 - psx_gpu->dither_table[0] = dither_table_row(-4, 0, -3, 1); psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1); psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0); @@ -4384,7 +4467,7 @@ u64 get_us(void) return (tv.tv_sec * 1000000ULL) + tv.tv_usec; } -#ifdef PANDORA_BUILD +#ifdef NEON_BUILD u32 get_counter() {