X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu.c;h=a79254da9f17bd496c993364d1157e3892e1abd6;hb=37725e8cc9157e2e7819538ee5c98279b8dbefff;hp=ce72af55f54b2db7e71a74ea95334a88d164d27a;hpb=c111e8f8fb8a0d3bd7b05c743a48d942e107cc79;p=pcsx_rearmed.git diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index ce72af55..a79254da 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -17,6 +17,10 @@ #include #include "common.h" +#ifndef NEON_BUILD +#include "vector_ops.h" +#endif +#include "psx_gpu_simd.h" u32 span_pixels = 0; u32 span_pixel_blocks = 0; @@ -298,9 +302,6 @@ void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, } } -void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, - u32 texture_page); - #ifndef NEON_BUILD void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) @@ -449,9 +450,6 @@ void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu) } } -void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); - void flush_render_block_buffer(psx_gpu_struct *psx_gpu) { if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) && @@ -515,9 +513,6 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) } -void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, - vertex_struct *b, vertex_struct *c); - #ifndef NEON_BUILD #define setup_gradient_calculation_input(set, vertex) \ @@ -769,13 +764,18 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, #ifndef NDEBUG #define setup_spans_debug_check(span_edge_data_element) \ - if (&span_edge_data_element - psx_gpu->span_edge_data < psx_gpu->num_spans) \ +{ \ + u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \ + if (_num_spans > MAX_SPANS) \ + *(int *)0 = 1; \ + if (_num_spans < psx_gpu->num_spans) \ { \ if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ *(int *)0 = 1; \ if(span_edge_data_element.y > 2048) \ *(int *)0 = 1; \ } \ +} \ #else #define setup_spans_debug_check(span_edge_data_element) \ @@ -1200,26 +1200,6 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, setup_spans_up(index_##major, index_##minor, minor, yes) \ -void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_right(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_a(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_down_b(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); -void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, - vertex_struct *v_b, vertex_struct *v_c); - - #ifndef NEON_BUILD void setup_spans_up_left(psx_gpu_struct *psx_gpu, vertex_struct *v_a, @@ -1423,12 +1403,16 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, y_x4.e[3] = y_a + 3; setup_spans_adjust_edges_alternate_no(index_left, index_right); + // FIXME: overflow corner case + if(psx_gpu->num_spans + height_minor_b == MAX_SPANS) + height_minor_b &= ~3; + psx_gpu->num_spans += height_minor_b; - do + while(height_minor_b > 0) { setup_spans_set_x4(none, down, no); height_minor_b -= 4; - } while(height_minor_b > 0); + } } left_split_triangles++; @@ -1932,30 +1916,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ psx_gpu->num_blocks = num_blocks; \ } \ -void setup_blocks_shaded_textured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); - -void setup_blocks_shaded_untextured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); -void setup_blocks_shaded_untextured_dithered_unswizzled_direct(psx_gpu_struct - *psx_gpu); -void setup_blocks_shaded_untextured_undithered_unswizzled_direct( - psx_gpu_struct *psx_gpu); - -void setup_blocks_unshaded_textured_dithered_unswizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_unshaded_untextured_undithered_unswizzled_indirect( - psx_gpu_struct *psx_gpu); -void setup_blocks_unshaded_untextured_undithered_unswizzled_direct( - psx_gpu_struct *psx_gpu); - -void setup_blocks_shaded_textured_dithered_swizzled_indirect(psx_gpu_struct - *psx_gpu); -void setup_blocks_unshaded_textured_dithered_swizzled_indirect(psx_gpu_struct - *psx_gpu); - //setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); @@ -1975,15 +1935,6 @@ setup_blocks_builder(shaded, untextured, dithered, unswizzled, direct); setup_blocks_builder(unshaded, untextured, undithered, unswizzled, indirect); setup_blocks_builder(unshaded, untextured, undithered, unswizzled, direct); -#endif - -void texture_blocks_untextured(psx_gpu_struct *psx_gpu); -void texture_blocks_4bpp(psx_gpu_struct *psx_gpu); -void texture_blocks_8bpp(psx_gpu_struct *psx_gpu); -void texture_blocks_16bpp(psx_gpu_struct *psx_gpu); - -#ifndef NEON_BUILD - void texture_blocks_untextured(psx_gpu_struct *psx_gpu) { if(psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE) @@ -2268,27 +2219,6 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \ } \ } \ -void shade_blocks_shaded_textured_modulated_dithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_shaded_textured_modulated_undithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_dithered_direct(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_undithered_direct(psx_gpu_struct - *psx_gpu); - -void shade_blocks_shaded_textured_modulated_dithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_shaded_textured_modulated_undithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct - *psx_gpu); -void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct - *psx_gpu); - -void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -2374,14 +2304,6 @@ void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \ shade_blocks_textured_unmodulated_builder(indirect) shade_blocks_textured_unmodulated_builder(direct) -#endif - - -void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu); - -#ifndef NEON_BUILD - void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu) { } @@ -2593,27 +2515,6 @@ void \ } \ } \ -void blend_blocks_textured_average_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_average_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_subtract_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_subtract_on(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_fourth_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_add_fourth_on(psx_gpu_struct *psx_gpu); - -void blend_blocks_untextured_average_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_average_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_subtract_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_subtract_on(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_fourth_off(psx_gpu_struct *psx_gpu); -void blend_blocks_untextured_add_fourth_on(psx_gpu_struct *psx_gpu); - -void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu); -void blend_blocks_textured_unblended_on(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu) @@ -2876,7 +2777,7 @@ blend_blocks_builder(textured, unblended, on); render_blocks_switch_block_texture_mode(4bpp), \ render_blocks_switch_block_texture_mode(8bpp), \ render_blocks_switch_block_texture_mode(16bpp), \ - render_blocks_switch_block_texture_mode(4bpp) \ + render_blocks_switch_block_texture_mode(16bpp) \ render_block_handler_struct render_triangle_block_handlers[] = @@ -3166,9 +3067,6 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, render_triangle_p(psx_gpu, vertex_ptrs, flags); } - -void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); - #ifndef NEON_BUILD void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) @@ -3871,20 +3769,6 @@ void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ } \ } \ -void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); - -void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); - #ifndef NEON_BUILD setup_sprite_tiled_builder(4bpp,); setup_sprite_tiled_builder(8bpp,); @@ -4013,11 +3897,17 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } -#endif - void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && + (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; @@ -4083,6 +3973,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } +#endif + +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color) +{ + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024); + u32 *vram_ptr; + + u32 num_width; + + if(psx_gpu->num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + } + + while(height) + { + num_width = width; + + vram_ptr = (void *)vram_ptr16; + if((long)vram_ptr16 & 2) + { + *vram_ptr16 = color_32bpp; + vram_ptr = (void *)(vram_ptr16 + 1); + num_width--; + } + + while(num_width >= 4 * 2) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + + vram_ptr += 4; + num_width -= 4 * 2; + } + + while(num_width >= 2) + { + *vram_ptr++ = color_32bpp; + num_width -= 2; + } + + if(num_width > 0) + { + *(u16 *)vram_ptr = color_32bpp; + } + + vram_ptr16 += 1024; + height--; + } +} #define setup_sprite_blocks_switch_textured(texture_mode) \ @@ -4171,7 +4121,7 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, render_sprite_blocks_switch_block_texture_mode(4bpp), \ render_sprite_blocks_switch_block_texture_mode(8bpp), \ render_sprite_blocks_switch_block_texture_mode(16bpp), \ - render_sprite_blocks_switch_block_texture_mode(4bpp) \ + render_sprite_blocks_switch_block_texture_mode(16bpp) \ render_block_handler_struct render_sprite_block_handlers[] = @@ -4264,7 +4214,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, } #define draw_pixel_line_mask_evaluate_yes() \ - if(*vram_ptr & 0x8000) \ + if((*vram_ptr & 0x8000) == 0) \ #define draw_pixel_line_mask_evaluate_no() \ @@ -4976,6 +4926,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512); initialize_reciprocal_table(); + psx_gpu->reciprocal_table_ptr = reciprocal_table; // 00 01 10 11 // 00 0 4 1 5 @@ -5006,7 +4957,7 @@ u64 get_us(void) return (tv.tv_sec * 1000000ULL) + tv.tv_usec; } -#ifdef NEON_BUILD +#if 0 //def NEON_BUILD u32 get_counter() {