X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu.c;h=092125b0412ad0cd557083412ea79b3ec6e1cd23;hp=9e84acea9d383b0213d97815707e1266d5856f3a;hb=e929dec505f8d3692248fe0d42c84a37c994ad39;hpb=69b09c0d33efd71ebe4886cfae41c162803683d5 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 9e84acea..092125b0 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -47,7 +47,8 @@ u32 zero_block_spans = 0; u32 texture_cache_loads = 0; u32 false_modulated_blocks = 0; -u32 reciprocal_table[512]; +/* double size for enhancement */ +u32 reciprocal_table[512 * 2]; typedef s32 fixed_type; @@ -244,6 +245,58 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, return mask; } +void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, + u32 x2, u32 y2) +{ + u32 mask = texture_region_mask(x1, y1, x2, y2); + u32 texture_page; + u8 *texture_page_ptr; + u16 *vram_ptr; + u32 texel_block; + u32 sub_x, sub_y; + + psx_gpu->dirty_textures_8bpp_mask |= mask; + psx_gpu->dirty_textures_8bpp_alternate_mask |= mask; + + if ((psx_gpu->dirty_textures_4bpp_mask & mask) == 0 && + (x1 & 3) == 0 && (y1 & 15) == 0 && x2 - x1 < 4 && y2 - y1 < 16) + { + texture_page = ((x1 / 64) & 15) + (y1 / 256) * 16; + texture_page_ptr = psx_gpu->texture_4bpp_cache[texture_page]; + texture_page_ptr += (x1 / 4 & 15) * 16*16 + (y1 / 16 & 15) * 16*16*16; + vram_ptr = psx_gpu->vram_ptr + x1 + y1 * 1024; + sub_x = 4; + sub_y = 16; + + while(sub_y) + { + while(sub_x) + { + texel_block = *vram_ptr; + + texture_page_ptr[0] = texel_block & 0xF; + texture_page_ptr[1] = (texel_block >> 4) & 0xF; + texture_page_ptr[2] = (texel_block >> 8) & 0xF; + texture_page_ptr[3] = texel_block >> 12; + + vram_ptr++; + texture_page_ptr += 4; + + sub_x--; + } + + vram_ptr -= 4; + sub_x = 4; + + sub_y--; + vram_ptr += 1024; + } + } + else + { + psx_gpu->dirty_textures_4bpp_mask |= mask; + } +} void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, u32 texture_page); @@ -401,7 +454,7 @@ void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( void flush_render_block_buffer(psx_gpu_struct *psx_gpu) { - if((psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) && + if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) && (psx_gpu->primitive_type == PRIMITIVE_TYPE_SPRITE)) { u32 num_blocks_dest = 0; @@ -411,7 +464,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) u16 *vram_ptr = psx_gpu->vram_ptr; u32 i; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) { for(i = 0; i < psx_gpu->num_blocks; i++) { @@ -514,7 +567,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, vec_4x32u uvrg_base; vec_4x32u b_base; - vec_4x32u const_0x8000; + vec_4x32u uvrgb_phase; vec_4x16s d0_a_d3_c, d0_b, d0_c; vec_4x16s d1_a, d1_b, d1_c_d2_a; @@ -543,12 +596,12 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, setup_gradient_calculation_input(1, b); setup_gradient_calculation_input(2, c); - dup_4x32b(const_0x8000, 0x8000); + dup_4x32b(uvrgb_phase, psx_gpu->uvrgb_phase); shl_long_4x16b(uvrg_base, x0_a_y0_c, 16); shl_long_4x16b(b_base, x0_b, 16); - add_4x32b(uvrg_base, uvrg_base, const_0x8000); - add_4x32b(b_base, b_base, const_0x8000); + add_4x32b(uvrg_base, uvrg_base, uvrgb_phase); + add_4x32b(b_base, b_base, uvrgb_phase); // Can probably pair these, but it'll require careful register allocation sub_4x16b(d0_a_d3_c, x1_a_y1_c, x0_a_y0_c); @@ -802,7 +855,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ dup_2x32b(edge_shifts, edge_shift); \ sub_2x32b(heights_b, heights, c_0x01); \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ \ mla_2x32b(heights_b, x_starts, heights); \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ @@ -831,8 +884,8 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, sub_2x32b(widths, x_ends, x_starts); \ width_alt = x_c - start_c; \ \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ - height_reciprocal_alt = edge_shift_alt >> 12; \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ + height_reciprocal_alt = edge_shift_alt >> 10; \ \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ edge_shift_alt &= 0x1F; \ @@ -1820,7 +1873,7 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ if(span_num_blocks) \ { \ y = span_edge_data->y; \ - fb_ptr = psx_gpu->vram_ptr + span_edge_data->left_x + (y * 1024); \ + fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024); \ \ setup_blocks_span_initialize_##shading##_##texturing(); \ setup_blocks_span_initialize_##dithering(texturing); \ @@ -2853,8 +2906,8 @@ char *render_block_flag_strings[] = (triangle_y_direction_##direction_c << 4) | \ (triangle_winding_##winding << 6)) \ -void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, - u32 flags) +static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + vertex_struct *vertexes_out[3]) { s32 y_top, y_bottom; s32 triangle_area; @@ -2875,7 +2928,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(b->y < a->y) @@ -2897,7 +2950,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(triangle_area < 0) @@ -2918,12 +2971,12 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, vertex_swap(a, b); } - if((c->x - a->x) >= 1024) + if((c->x - psx_gpu->offset_x) >= 1024 || (c->x - a->x) >= 1024) { #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x, @@ -2932,13 +2985,28 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } - psx_gpu->num_spans = 0; psx_gpu->triangle_area = triangle_area; psx_gpu->triangle_winding = triangle_winding; + vertexes_out[0] = a; + vertexes_out[1] = b; + vertexes_out[2] = c; + + return 1; +} + +static void render_triangle_p(psx_gpu_struct *psx_gpu, + vertex_struct *vertex_ptrs[3], u32 flags) +{ + psx_gpu->num_spans = 0; + + vertex_struct *a = vertex_ptrs[0]; + vertex_struct *b = vertex_ptrs[1]; + vertex_struct *c = vertex_ptrs[2]; + s32 y_delta_a = b->y - a->y; s32 y_delta_b = c->y - b->y; s32 y_delta_c = c->y - a->y; @@ -2950,7 +3018,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, compute_all_gradients(psx_gpu, a, b, c); switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) | - (triangle_winding << 6)) + (psx_gpu->triangle_winding << 6)) { triangle_case(up, up, up, negative): triangle_case(up, up, flat, negative): @@ -3029,11 +3097,11 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, spans += psx_gpu->num_spans; #endif - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) + if(unlikely(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED)) { u32 i; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) { for(i = 0; i < psx_gpu->num_spans; i++) { @@ -3074,6 +3142,14 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, (psx_gpu); } +void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 flags) +{ + vertex_struct *vertex_ptrs[3]; + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) + render_triangle_p(psx_gpu, vertex_ptrs, flags); +} + void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); @@ -4193,7 +4269,7 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, flags &= ~RENDER_FLAGS_TEXTURE_MAP; - vram_ptr = psx_gpu->vram_ptr + (y_a * 1024) + x_a; + vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a; control_mask = 0x0; @@ -4383,7 +4459,6 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, if((width == 0) || (height == 0)) return; - flush_render_block_buffer(psx_gpu); invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); u32 r = color & 0xFF; @@ -4393,17 +4468,17 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, psx_gpu->mask_msb; u32 color_32bpp = color_16bpp | (color_16bpp << 16); - u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024)); + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); u32 pitch = 512 - (width / 2); u32 num_width; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) + if(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) { pitch += 512; height /= 2; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) vram_ptr += 512; } @@ -4430,6 +4505,50 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, } } +void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, + u32 width, u32 height) +{ + if((width == 0) || (height == 0)) + return; + + if(width > 1024) + width = 1024; + + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); + + u32 pitch = 1024 / 2 - (width / 2); + u32 num_width; + + while(height) + { + num_width = width; + while(num_width) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + vram_ptr[4] = color_32bpp; + vram_ptr[5] = color_32bpp; + vram_ptr[6] = color_32bpp; + vram_ptr[7] = color_32bpp; + + vram_ptr += 8; + num_width -= 16; + } + + vram_ptr += pitch; + height--; + } +} + void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, u32 width, u32 height, u32 pitch) { @@ -4470,16 +4589,17 @@ void initialize_reciprocal_table(void) u32 height_reciprocal; s32 shift; - for(height = 1; height < 512; height++) + for(height = 1; height < sizeof(reciprocal_table) + / sizeof(reciprocal_table[0]); height++) { shift = __builtin_clz(height); height_normalized = height << shift; - height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / + height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) / height_normalized; - shift = 32 - (50 - shift); + shift = 32 - (52 - shift); - reciprocal_table[height] = (height_reciprocal << 12) | shift; + reciprocal_table[height] = (height_reciprocal << 10) | shift; } } @@ -4507,8 +4627,10 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->render_state = 0; psx_gpu->render_state_base = 0; psx_gpu->num_blocks = 0; + psx_gpu->uvrgb_phase = 0x8000; psx_gpu->vram_ptr = vram; + psx_gpu->vram_out_ptr = vram; psx_gpu->texture_page_base = psx_gpu->vram_ptr; psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; @@ -4521,7 +4643,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->texture_mask_width = 0xFF; psx_gpu->texture_mask_height = 0xFF; - psx_gpu->interlace_mode = 0; + psx_gpu->render_mode = 0; memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512); @@ -4544,6 +4666,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2); psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; + + psx_gpu->enhancement_x_threshold = 256; } u64 get_us(void)