X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=87a14f64276257f1da6a460696783d7b79d97d9a;hb=05e2e0c6e20a335c9ce86d22a2ae1ba0f5bd2865;hp=79d54665081cc80b2441f027e1838594cdac0091;hpb=cb88320b4ddbfd8c1714f9a6cba31543a585a8cd;p=pcsx_rearmed.git diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 79d54665..87a14f64 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -182,6 +182,7 @@ #define uvrg_dx3l d6 #define uvrg_dx3h d7 +#define uvrgb_phase q13 .align 4 @@ -313,11 +314,16 @@ function(compute_all_gradients) vmull.s16 ga_uvrg_y, d0_b, d1_b rsbmi ga_bx, ga_bx, #0 + @ r12 = psx_gpu->uvrgb_phase + ldr r12, [ psx_gpu, #psx_gpu_uvrgb_phase_offset ] + vmlsl.s16 ga_uvrg_y, d2_b, d3_b movs gs_by, ga_by, asr #31 vshr.u64 d0, d30, #22 - mov b_base, b0, lsl #16 + add b_base, r12, b0, lsl #16 + + vdup.u32 uvrgb_phase, r12 rsbmi ga_by, ga_by, #0 vclt.s32 gs_uvrg_x, ga_uvrg_x, #0 @ gs_uvrg_x = ga_uvrg_x < 0 @@ -326,7 +332,6 @@ function(compute_all_gradients) ldrb r12, [ psx_gpu, #psx_gpu_triangle_winding_offset ] vclt.s32 gs_uvrg_y, ga_uvrg_y, #0 @ gs_uvrg_y = ga_uvrg_y < 0 - add b_base, b_base, #0x8000 rsb r12, r12, #0 @ r12 = -(triangle->winding) vdup.u32 w_mask, r12 @ w_mask = { -w, -w, -w, -w } @@ -335,7 +340,7 @@ function(compute_all_gradients) vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16 vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift } - vorr.u32 uvrg_base, #0x8000 + vadd.u32 uvrg_base, uvrgb_phase vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x) vmov area_r_s, s0 @ area_r_s = triangle_reciprocal @@ -1337,7 +1342,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -1604,7 +1609,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -1799,7 +1804,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] ldrh y, [ span_edge_data, #edge_data_y_offset ] - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] cmp span_num_blocks, #0 beq 1f @@ -1919,7 +1924,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] ldrh y, [ span_edge_data, #edge_data_y_offset ] - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] cmp span_num_blocks, #0 beq 1f @@ -2106,7 +2111,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -2346,7 +2351,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -4804,7 +4809,7 @@ function(setup_sprite_##texture_mode) \ and offset_u, u, #0xF; \ \ ldr width, [ sp, #40 ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ ldr height, [ sp, #44 ]; \ add fb_ptr, fb_ptr, y, lsl #11; \ @@ -5052,7 +5057,7 @@ setup_sprites_16bpp_flush_row: function(setup_sprite_16bpp) stmdb sp!, { r4 - r11, r14 } - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] ldr v, [ sp, #36 ] add fb_ptr, fb_ptr, y, lsl #11 @@ -5405,3 +5410,38 @@ function(update_texture_8bpp_cache_slice) vpop { q0 - q3 } ldmia sp!, { r4 - r11, pc } + +/* void scale2x_tiles8(void *dst, const void *src, int w8, int h) */ +function(scale2x_tiles8) + push { r4, r14 } + + mov r4, r1 + add r12, r0, #1024*2 + mov r14, r2 + +0: + vld1.u16 { q0 }, [ r1, :128 ]! + vld1.u16 { q2 }, [ r1, :128 ]! + vmov q1, q0 + vmov q3, q2 + vzip.16 q0, q1 + vzip.16 q2, q3 + subs r14, #2 + vst1.u16 { q0, q1 }, [ r0, :128 ]! + vst1.u16 { q0, q1 }, [ r12, :128 ]! + blt 1f + vst1.u16 { q2, q3 }, [ r0, :128 ]! + vst1.u16 { q2, q3 }, [ r12, :128 ]! + bgt 0b +1: + subs r3, #1 + mov r14, r2 + add r0, #1024*2*2 + add r4, #1024*2 + sub r0, r2, lsl #4+1 + mov r1, r4 + add r12, r0, #1024*2 + bgt 0b + nop + + pop { r4, pc }