X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=6108bc35b26d12951b457f9c55776b0bd17f2015;hp=381f3a9ead3319978b7b7829c3ad218a5afc7aeb;hb=5d834c089ea695dba7643cba8686ce2ac06d8db4;hpb=75e28f62b2a50044b58075d63d207409e0148409 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 381f3a9e..6108bc35 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -38,10 +38,10 @@ #define psx_gpu_dirty_textures_8bpp_mask_offset 172 #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176 #define psx_gpu_triangle_color_offset 180 -#define psx_gpu_primitive_color_offset 184 -#define psx_gpu_dither_table_offset 188 -#define psx_gpu_render_block_handler_offset 204 -#define psx_gpu_texture_page_ptr_offset 208 +#define psx_gpu_dither_table_offset 184 +#define psx_gpu_render_block_handler_offset 200 +#define psx_gpu_texture_page_ptr_offset 204 +#define psx_gpu_texture_page_base_offset 208 #define psx_gpu_clut_ptr_offset 212 #define psx_gpu_vram_ptr_offset 216 @@ -76,8 +76,6 @@ #define psx_gpu_span_edge_data_offset 0x4100 #define psx_gpu_span_b_offset_offset 0x5100 -#define psx_gpu__vram_offset 0x005900 - #define edge_data_left_x_offset 0 #define edge_data_num_blocks_offset 2 #define edge_data_right_mask_offset 4 @@ -243,6 +241,18 @@ .align 4 +/* FIXME: users of this should be in psx_gpu instead */ +#ifndef __PIC__ +#define load_pointer(register, pointer) \ + movw register, :lower16:pointer; \ + movt register, :upper16:pointer; \ + +#else +#define load_pointer(register, pointer) \ + ldr register, =pointer \ + +#endif + #define function(name) \ .global name; \ name: \ @@ -611,8 +621,7 @@ function(compute_all_gradients) vld1.32 { uvrg }, [ temp ]; \ add temp, psx_gpu, #psx_gpu_uvrg_dy_offset; \ vld1.32 { uvrg_dy }, [ temp ]; \ - movw reciprocal_table_ptr, :lower16:reciprocal_table; \ - movt reciprocal_table_ptr, :upper16:reciprocal_table; \ + load_pointer(reciprocal_table_ptr, reciprocal_table); \ \ vmov.u32 c_0x01, #0x01 \ @@ -1018,6 +1027,7 @@ function(setup_spans_up_left) function(setup_spans_up_right) setup_spans_up_up(right, left) +.pool #define setup_spans_down_down(minor, major) \ setup_spans_prologue(); \ @@ -1226,6 +1236,7 @@ function(setup_spans_up_down) setup_spans_prologue_b() bal 4b +.pool #undef span_uvrg_offset #undef span_edge_data @@ -1957,6 +1968,8 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) vdup.u16 colors, color add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset + orr color, color, lsl #16 + 0: ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] @@ -1983,22 +1996,32 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) 3: ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ] - eor right_mask, right_mask, #0xFF - 4: - strh color, [ fb_ptr ], #2 - movs right_mask, right_mask, lsr #1 - bne 4b + cmp right_mask, #0x0 + beq 5f + + tst right_mask, #0xF + streq color, [ fb_ptr ], #4 + moveq right_mask, right_mask, lsr #4 + streq color, [ fb_ptr ], #4 + + tst right_mask, #0x3 + streq color, [ fb_ptr ], #4 + moveq right_mask, right_mask, lsr #2 + + tst right_mask, #0x1 + streqh color, [ fb_ptr ] 1: add span_edge_data, span_edge_data, #8 subs num_spans, num_spans, #1 - - strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] bne 0b ldmia sp!, { r4 - r11, pc } + 5: + vst1.u32 { colors }, [ fb_ptr ] + bal 1b #undef c_64 @@ -2339,6 +2362,7 @@ setup_blocks_shaded_untextured_indirect_builder(dithered) #define draw_mask q0 #define pixels_low d16 +#define pixels_high d17 @@ -2502,23 +2526,67 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ 3: \ setup_blocks_shaded_untextured_dither_a_##dithering(); \ \ - ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ]; \ + ldrh right_mask, [ span_edge_data, #edge_data_right_mask_offset ]; \ setup_blocks_shaded_untextured_dither_b_##dithering(); \ \ vshr.u8 r_whole_8, r_whole_8, #3; \ + rbit right_mask, right_mask; \ vmov pixels, msb_mask; \ vbic.u8 gb_whole_8, gb_whole_8, d128_0x7; \ - eor right_mask, right_mask, #0xFF; \ + clz right_mask, right_mask; \ \ vmlal.u8 pixels, r_whole_8, d64_1; \ vmlal.u8 pixels, g_whole_8, d64_4; \ vmlal.u8 pixels, b_whole_8, d64_128; \ \ + ldr pc, [ pc, right_mask, lsl #2 ]; \ + nop; \ + nop; \ + .word 4f; \ + .word 5f; \ + .word 6f; \ + .word 7f; \ + .word 8f; \ + .word 9f; \ + .word 10f; \ + .word 11f; \ + \ 4: \ - vst1.u16 { pixels_low[0] }, [ fb_ptr ]!; \ - vext.16 pixels, pixels, #1; \ - movs right_mask, right_mask, lsr #1; \ - bne 4b; \ + vst1.u16 { pixels_low[0] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 5: \ + vst1.u32 { pixels_low[0] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 6: \ + vst1.u32 { pixels_low[0] }, [ fb_ptr ]!; \ + vst1.u16 { pixels_low[2] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 7: \ + vst1.u32 { pixels_low }, [ fb_ptr ]; \ + bal 1f; \ + \ + 8: \ + vst1.u32 { pixels_low }, [ fb_ptr ]!; \ + vst1.u16 { pixels_high[0] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 9: \ + vst1.u32 { pixels_low }, [ fb_ptr ]!; \ + vst1.u32 { pixels_high[0] }, [ fb_ptr ]!; \ + bal 1f; \ + \ + 10: \ + vst1.u32 { pixels_low }, [ fb_ptr ]!; \ + vst1.u32 { pixels_high[0] }, [ fb_ptr ]!; \ + vst1.u16 { pixels_high[2] }, [ fb_ptr ]; \ + bal 1f; \ + \ + 11: \ + vst1.u32 { pixels }, [ fb_ptr ]; \ + bal 1f; \ \ 1: \ add span_uvrg_offset, span_uvrg_offset, #16; \ @@ -2959,6 +3027,8 @@ function(texture_blocks_16bpp) #define psx_gpu r0 #define num_blocks r1 #define color_ptr r2 +#define colors_scalar r2 +#define colors_scalar_compare r3 #define mask_msb_ptr r2 #define block_ptr_load_a r0 @@ -3015,9 +3085,21 @@ function(texture_blocks_16bpp) add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ vld1.u16 { msb_mask_low[], msb_mask_high[] }, [ mask_msb_ptr, :16 ] \ -#define shade_blocks_textured_modulated_prologue_shaded() \ -#define shade_blocks_textured_modulated_prologue_unshaded() \ +#define shade_blocks_textured_modulated_prologue_shaded(dithering, target) \ + +#define shade_blocks_textured_false_modulation_check_undithered(target) \ + ldr colors_scalar, [ psx_gpu, #psx_gpu_triangle_color_offset ]; \ + movw colors_scalar_compare, #0x8080; \ + \ + movt colors_scalar_compare, #0x80; \ + cmp colors_scalar, colors_scalar_compare; \ + beq shade_blocks_textured_unmodulated_##target \ + +#define shade_blocks_textured_false_modulation_check_dithered(target) \ + +#define shade_blocks_textured_modulated_prologue_unshaded(dithering, target) \ + shade_blocks_textured_false_modulation_check_##dithering(target); \ add color_ptr, psx_gpu, #psx_gpu_triangle_color_offset; \ vld1.u32 { colors_r[] }, [ color_ptr, :32 ]; \ vdup.u8 colors_g, colors_r[1]; \ @@ -3088,13 +3170,13 @@ function(texture_blocks_16bpp) .align 3; \ \ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ + shade_blocks_textured_modulated_prologue_##shading(dithering, target); \ stmdb sp!, { r4 - r5, lr }; \ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \ \ vld1.u32 { test_mask }, [ psx_gpu, :128 ]; \ \ shade_blocks_textured_modulated_prologue_##target(); \ - shade_blocks_textured_modulated_prologue_##shading(); \ \ add block_ptr_load_a, psx_gpu, #psx_gpu_blocks_offset; \ mov c_32, #32; \ @@ -4268,102 +4350,52 @@ function(warmup) bx lr +#undef vram_ptr #undef color -#undef y +#undef width #undef height - -#define psx_gpu r0 -#define color r1 -#define x r2 -#define y r3 +#undef pitch #define vram_ptr r0 -#define width r3 -#define height r12 - -#define parameter_width_offset 0 -#define parameter_height_offset 4 +#define color r1 +#define width r2 +#define height r3 -#define color_r r14 -#define color_g r4 -#define color_b r5 +#define pitch r1 -#define left_unaligned r14 -#define right_unaligned r4 -#define pitch r5 -#define num_unaligned r2 -#define num_width r6 +#define num_width r12 -#undef colors +#undef colors_a +#undef colors_b -#define colors q0 +#define colors_a q0 +#define colors_b q1 .align 3 function(render_block_fill_body) - ldr vram_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] - ldr height, [ sp, #parameter_height_offset ] - - add vram_ptr, vram_ptr, y, lsl #11 - ldr width, [ sp, #parameter_width_offset ] - - add vram_ptr, vram_ptr, x, lsl #1 - stmdb sp!, { r4 - r6, r14 } - - ubfx color_r, color, #3, #5 - ubfx color_g, color, #11, #5 - - ubfx color_b, color, #19, #5 - orr color, color_r, color_g, lsl #5 - - orr color, color, color_b, lsl #10 - add left_unaligned, x, #0x7 - - bic left_unaligned, left_unaligned, #0x7 - vdup.u16 colors, color - - sub left_unaligned, left_unaligned, x + vdup.u16 colors_a, color mov pitch, #2048 + vmov colors_b, colors_a sub pitch, pitch, width, lsl #1 - sub width, width, left_unaligned - - and right_unaligned, width, #0x7 - bic width, width, #0x7 - - 0: - mov num_width, width, lsr #3 - - movs num_unaligned, left_unaligned - beq 2f - 1: - strh color, [ vram_ptr ], #2 - - subs num_unaligned, num_unaligned, #1 - bne 1b - - 2: - vst1.u32 { colors }, [ vram_ptr, :128 ]! - subs num_width, num_width, #1 - bne 2b + mov num_width, width - movs num_unaligned, right_unaligned - beq 4f + 0: + vst1.u32 { colors_a, colors_b }, [ vram_ptr, :256 ]! - 3: - strh color, [ vram_ptr ], #2 - - subs num_unaligned, num_unaligned, #1 - bne 3b + subs num_width, num_width, #2 + bne 0b - 4: add vram_ptr, vram_ptr, pitch + mov num_width, width + subs height, height, #1 bne 0b - - ldmia sp!, { r4 - r6, pc } + bx lr + #undef x #undef y @@ -5124,7 +5156,7 @@ function(setup_sprite_16bpp) ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] add block, psx_gpu, #psx_gpu_blocks_offset - bic texture_offset_base, texture_offset_base, #0x7 + bic texture_offset_base, texture_offset_base, #0xF cmp block_width, #1 ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] @@ -5273,7 +5305,7 @@ function(update_texture_4bpp_cache) ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ] - ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] + ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ] ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ] and current_texture_page_x, current_texture_page, #0xF @@ -5377,7 +5409,7 @@ function(update_texture_8bpp_cache_slice) ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ] ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ] - ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] + ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ] mov tile_y, #16 and texture_page_x, texture_page, #0xF