X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu.c;h=9b5a64d730ef0d10bcdfa005238a0757b496a5cb;hp=2acfedc6d5a18e0e6d34257b4d35e7ebd37e2ede;hb=3b3dee71d84bbbb376548d794b7a11cd38833cf0;hpb=3867c6efed8d1cd6cd40f07cd46876f59da8912f diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 2acfedc6..9b5a64d7 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -47,7 +47,8 @@ u32 zero_block_spans = 0; u32 texture_cache_loads = 0; u32 false_modulated_blocks = 0; -u32 reciprocal_table[512]; +/* double size for enhancement */ +u32 reciprocal_table[512 * 2]; typedef s32 fixed_type; @@ -244,6 +245,58 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1, return mask; } +void update_texture_cache_region(psx_gpu_struct *psx_gpu, u32 x1, u32 y1, + u32 x2, u32 y2) +{ + u32 mask = texture_region_mask(x1, y1, x2, y2); + u32 texture_page; + u8 *texture_page_ptr; + u16 *vram_ptr; + u32 texel_block; + u32 sub_x, sub_y; + + psx_gpu->dirty_textures_8bpp_mask |= mask; + psx_gpu->dirty_textures_8bpp_alternate_mask |= mask; + + if ((psx_gpu->dirty_textures_4bpp_mask & mask) == 0 && + (x1 & 3) == 0 && (y1 & 15) == 0 && x2 - x1 < 4 && y2 - y1 < 16) + { + texture_page = ((x1 / 64) & 15) + (y1 / 256) * 16; + texture_page_ptr = psx_gpu->texture_4bpp_cache[texture_page]; + texture_page_ptr += (x1 / 4 & 15) * 16*16 + (y1 / 16 & 15) * 16*16*16; + vram_ptr = psx_gpu->vram_ptr + x1 + y1 * 1024; + sub_x = 4; + sub_y = 16; + + while(sub_y) + { + while(sub_x) + { + texel_block = *vram_ptr; + + texture_page_ptr[0] = texel_block & 0xF; + texture_page_ptr[1] = (texel_block >> 4) & 0xF; + texture_page_ptr[2] = (texel_block >> 8) & 0xF; + texture_page_ptr[3] = texel_block >> 12; + + vram_ptr++; + texture_page_ptr += 4; + + sub_x--; + } + + vram_ptr -= 4; + sub_x = 4; + + sub_y--; + vram_ptr += 1024; + } + } + else + { + psx_gpu->dirty_textures_4bpp_mask |= mask; + } +} void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu, u32 texture_page); @@ -281,6 +334,7 @@ void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu) while(sub_x) { texel_block = *vram_ptr; + texture_page_ptr[0] = texel_block & 0xF; texture_page_ptr[1] = (texel_block >> 4) & 0xF; texture_page_ptr[2] = (texel_block >> 8) & 0xF; @@ -400,6 +454,48 @@ void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( void flush_render_block_buffer(psx_gpu_struct *psx_gpu) { + if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) && + (psx_gpu->primitive_type == PRIMITIVE_TYPE_SPRITE)) + { + u32 num_blocks_dest = 0; + block_struct *block_src = psx_gpu->blocks; + block_struct *block_dest = psx_gpu->blocks; + + u16 *vram_ptr = psx_gpu->vram_ptr; + u32 i; + + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) + { + for(i = 0; i < psx_gpu->num_blocks; i++) + { + u32 fb_offset = (u32)((u8 *)block_src->fb_ptr - (u8 *)vram_ptr); + if(fb_offset & (1 << 11)) + { + *block_dest = *block_src; + num_blocks_dest++; + block_dest++; + } + block_src++; + } + } + else + { + for(i = 0; i < psx_gpu->num_blocks; i++) + { + u32 fb_offset = (u32)((u8 *)block_src->fb_ptr - (u8 *)vram_ptr); + if((fb_offset & (1 << 11)) == 0) + { + *block_dest = *block_src; + num_blocks_dest++; + block_dest++; + } + block_src++; + } + } + + psx_gpu->num_blocks = num_blocks_dest; + } + if(psx_gpu->num_blocks) { render_block_handler_struct *render_block_handler = @@ -471,7 +567,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, vec_4x32u uvrg_base; vec_4x32u b_base; - vec_4x32u const_0x8000; + vec_4x32u uvrgb_phase; vec_4x16s d0_a_d3_c, d0_b, d0_c; vec_4x16s d1_a, d1_b, d1_c_d2_a; @@ -500,12 +596,12 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, setup_gradient_calculation_input(1, b); setup_gradient_calculation_input(2, c); - dup_4x32b(const_0x8000, 0x8000); + dup_4x32b(uvrgb_phase, psx_gpu->uvrgb_phase); shl_long_4x16b(uvrg_base, x0_a_y0_c, 16); shl_long_4x16b(b_base, x0_b, 16); - add_4x32b(uvrg_base, uvrg_base, const_0x8000); - add_4x32b(b_base, b_base, const_0x8000); + add_4x32b(uvrg_base, uvrg_base, uvrgb_phase); + add_4x32b(b_base, b_base, uvrgb_phase); // Can probably pair these, but it'll require careful register allocation sub_4x16b(d0_a_d3_c, x1_a_y1_c, x0_a_y0_c); @@ -759,7 +855,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ dup_2x32b(edge_shifts, edge_shift); \ sub_2x32b(heights_b, heights, c_0x01); \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ \ mla_2x32b(heights_b, x_starts, heights); \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ @@ -788,8 +884,8 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, sub_2x32b(widths, x_ends, x_starts); \ width_alt = x_c - start_c; \ \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ - height_reciprocal_alt = edge_shift_alt >> 12; \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ + height_reciprocal_alt = edge_shift_alt >> 10; \ \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ edge_shift_alt &= 0x1F; \ @@ -1777,7 +1873,7 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ if(span_num_blocks) \ { \ y = span_edge_data->y; \ - fb_ptr = psx_gpu->vram_ptr + span_edge_data->left_x + (y * 1024); \ + fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024); \ \ setup_blocks_span_initialize_##shading##_##texturing(); \ setup_blocks_span_initialize_##dithering(texturing); \ @@ -2017,6 +2113,10 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu) #define shade_blocks_textured_false_modulated_check_dithered(target) \ + if(psx_gpu->triangle_color == 0x808080) \ + { \ + false_modulated_blocks += num_blocks; \ + } \ #define shade_blocks_textured_false_modulated_check_undithered(target) \ if(psx_gpu->triangle_color == 0x808080) \ @@ -2265,7 +2365,7 @@ void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu); void shade_blocks_unshaded_untextured_direct(psx_gpu_struct *psx_gpu); #ifndef NEON_BUILD - + void shade_blocks_unshaded_untextured_indirect(psx_gpu_struct *psx_gpu) { } @@ -2806,8 +2906,8 @@ char *render_block_flag_strings[] = (triangle_y_direction_##direction_c << 4) | \ (triangle_winding_##winding << 6)) \ -void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, - u32 flags) +static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + vertex_struct *vertexes_out[3]) { s32 y_top, y_bottom; s32 triangle_area; @@ -2828,7 +2928,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(b->y < a->y) @@ -2850,7 +2950,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(triangle_area < 0) @@ -2871,12 +2971,12 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, vertex_swap(a, b); } - if((c->x - a->x) >= 1024) + if((c->x - psx_gpu->offset_x) >= 1024 || (c->x - a->x) >= 1024) { #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x, @@ -2885,13 +2985,28 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } - psx_gpu->num_spans = 0; psx_gpu->triangle_area = triangle_area; psx_gpu->triangle_winding = triangle_winding; + vertexes_out[0] = a; + vertexes_out[1] = b; + vertexes_out[2] = c; + + return 1; +} + +static void render_triangle_p(psx_gpu_struct *psx_gpu, + vertex_struct *vertex_ptrs[3], u32 flags) +{ + psx_gpu->num_spans = 0; + + vertex_struct *a = vertex_ptrs[0]; + vertex_struct *b = vertex_ptrs[1]; + vertex_struct *c = vertex_ptrs[2]; + s32 y_delta_a = b->y - a->y; s32 y_delta_b = c->y - b->y; s32 y_delta_c = c->y - a->y; @@ -2903,7 +3018,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, compute_all_gradients(psx_gpu, a, b, c); switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) | - (triangle_winding << 6)) + (psx_gpu->triangle_winding << 6)) { triangle_case(up, up, up, negative): triangle_case(up, up, flat, negative): @@ -2982,6 +3097,28 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, spans += psx_gpu->num_spans; #endif + if(unlikely(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED)) + { + u32 i; + + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) + { + for(i = 0; i < psx_gpu->num_spans; i++) + { + if((psx_gpu->span_edge_data[i].y & 1) == 0) + psx_gpu->span_edge_data[i].num_blocks = 0; + } + } + else + { + for(i = 0; i < psx_gpu->num_spans; i++) + { + if(psx_gpu->span_edge_data[i].y & 1) + psx_gpu->span_edge_data[i].num_blocks = 0; + } + } + } + u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | RENDER_FLAGS_TEXTURE_MAP | RENDER_FLAGS_SHADE); @@ -3005,6 +3142,14 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, (psx_gpu); } +void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 flags) +{ + vertex_struct *vertex_ptrs[3]; + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) + render_triangle_p(psx_gpu, vertex_ptrs, flags); +} + void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); @@ -3762,7 +3907,7 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, RENDER_FLAGS_TEXTURE_MAP); render_state |= (psx_gpu->render_state_base & ~RENDER_STATE_DITHER); - + if((psx_gpu->render_state != render_state) || (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)) { @@ -4034,9 +4179,6 @@ do \ { \ delta_y *= -1; \ \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(decrement, shading, blending, dithering, \ @@ -4050,9 +4192,6 @@ do \ } \ else \ { \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(increment, shading, blending, dithering, \ @@ -4067,7 +4206,7 @@ do \ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, - u32 color) + u32 color, int double_resolution) { s32 color_r, color_g, color_b; u32 triangle_winding = 0; @@ -4119,12 +4258,22 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, delta_x = x_b - x_a; delta_y = y_b - y_a; - if(delta_x >= 1024) + if(delta_x >= 1024 || delta_y >= 512 || delta_y <= -512) return; + if(double_resolution) + { + x_a *= 2; + x_b *= 2; + y_a *= 2; + y_b *= 2; + delta_x *= 2; + delta_y *= 2; + } + flags &= ~RENDER_FLAGS_TEXTURE_MAP; - vram_ptr = psx_gpu->vram_ptr + (y_a * 1024) + x_a; + vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a; control_mask = 0x0; @@ -4311,32 +4460,97 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, u32 width, u32 height) { + if((width == 0) || (height == 0)) + return; + invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); -#ifndef NEON_BUILD u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; u32 b = (color >> 16) & 0xFF; - u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10); + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); - u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024); - u32 draw_x, draw_y; + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); - for(draw_y = 0; draw_y < height; draw_y++) + u32 pitch = 512 - (width / 2); + u32 num_width; + + if(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) { - for(draw_x = 0; draw_x < width; draw_x++) + pitch += 512; + height /= 2; + + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) + vram_ptr += 512; + } + + while(height) + { + num_width = width; + while(num_width) { - vram_ptr[draw_x] = color_16bpp; + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + vram_ptr[4] = color_32bpp; + vram_ptr[5] = color_32bpp; + vram_ptr[6] = color_32bpp; + vram_ptr[7] = color_32bpp; + + vram_ptr += 8; + num_width -= 16; } - vram_ptr += 1024; + vram_ptr += pitch; + height--; } -#else - void render_block_fill_body(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, - u32 width, u32 height); +} - render_block_fill_body(psx_gpu, color, x, y, width, height); -#endif +void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, + u32 width, u32 height) +{ + if((width == 0) || (height == 0)) + return; + + if(width > 1024) + width = 1024; + + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); + + u32 pitch = 1024 / 2 - (width / 2); + u32 num_width; + + while(height) + { + num_width = width; + while(num_width) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + vram_ptr[4] = color_32bpp; + vram_ptr[5] = color_32bpp; + vram_ptr[6] = color_32bpp; + vram_ptr[7] = color_32bpp; + + vram_ptr += 8; + num_width -= 16; + } + + vram_ptr += pitch; + height--; + } } void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, @@ -4344,14 +4558,19 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, { u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024); u32 draw_x, draw_y; + u32 mask_msb = psx_gpu->mask_msb; + + if((width == 0) || (height == 0)) + return; + flush_render_block_buffer(psx_gpu); invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); for(draw_y = 0; draw_y < height; draw_y++) { for(draw_x = 0; draw_x < width; draw_x++) { - vram_ptr[draw_x] = source[draw_x]; + vram_ptr[draw_x] = source[draw_x] | mask_msb; } source += pitch; @@ -4374,16 +4593,17 @@ void initialize_reciprocal_table(void) u32 height_reciprocal; s32 shift; - for(height = 1; height < 512; height++) + for(height = 1; height < sizeof(reciprocal_table) + / sizeof(reciprocal_table[0]); height++) { shift = __builtin_clz(height); height_normalized = height << shift; - height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / + height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) / height_normalized; - shift = 32 - (50 - shift); + shift = 32 - (52 - shift); - reciprocal_table[height] = (height_reciprocal << 12) | shift; + reciprocal_table[height] = (height_reciprocal << 10) | shift; } } @@ -4411,8 +4631,10 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->render_state = 0; psx_gpu->render_state_base = 0; psx_gpu->num_blocks = 0; + psx_gpu->uvrgb_phase = 0x8000; psx_gpu->vram_ptr = vram; + psx_gpu->vram_out_ptr = vram; psx_gpu->texture_page_base = psx_gpu->vram_ptr; psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; @@ -4420,6 +4642,13 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->mask_msb = 0; + psx_gpu->texture_window_x = 0; + psx_gpu->texture_window_y = 0; + psx_gpu->texture_mask_width = 0xFF; + psx_gpu->texture_mask_height = 0xFF; + + psx_gpu->render_mode = 0; + memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512); initialize_reciprocal_table(); @@ -4441,6 +4670,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2); psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; + + psx_gpu->enhancement_x_threshold = 256; } u64 get_us(void)