X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=ffbea043c2cb64ddde846414f0ea62390ebf7d19;hp=da47756efcaea45cd3704fd5338e88c3895daa75;hb=refs%2Fheads%2Fmaster;hpb=26e3e2aa7525fd4e63e64192dfbb68950e0e4c5a diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index da47756e..ffbea043 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -13,15 +13,9 @@ * General Public License for more details. */ -#define MAX_SPANS 512 -#define MAX_BLOCKS 64 -#define MAX_BLOCKS_PER_ROW 128 - -#define RENDER_STATE_MASK_EVALUATE 0x20 -#define RENDER_FLAGS_MODULATE_TEXELS 0x1 -#define RENDER_FLAGS_BLEND 0x2 #define RENDER_INTERLACE_ENABLED 0x1 +#include "psx_gpu.h" #include "psx_gpu_offsets.h" #define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) @@ -228,7 +222,6 @@ #ifdef __MACH__ #define flush_render_block_buffer _flush_render_block_buffer -#define setup_sprite_untextured_simple _setup_sprite_untextured_simple #define update_texture_8bpp_cache _update_texture_8bpp_cache #endif @@ -369,8 +362,8 @@ function(compute_all_gradients) sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS) vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16 - vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift } - + vdup.u32 r_shift, r14 @ r_shift = { shift, shift*, shift, shift* } + @ * - vshl.u64: ignored by hw vadd.u32 uvrg_base, uvrgb_phase vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x) @@ -565,6 +558,8 @@ function(compute_all_gradients) #define left_x_32_low d22 #define left_x_32_high d23 +#define tmp_max_blocks d20 + #define edges_xy q0 #define edges_dx_dy d2 #define edge_shifts d3 @@ -819,8 +814,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -867,8 +864,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ - vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ + vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -908,7 +907,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ add temp, temp, #(1 << 16); \ + movgt height, #512; \ add y_a, temp, #2; \ add y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -963,7 +964,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ sub temp, temp, #(1 << 16); \ + movgt height, #512; \ sub y_a, temp, #2; \ sub y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -5826,14 +5829,7 @@ function(setup_sprite_16bpp_4x) .align 3 -function(setup_sprite_untextured) - ldrh r12, [psx_gpu, #psx_gpu_render_state_offset] - tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \ - | RENDER_FLAGS_BLEND) - ldrbeq r12, [psx_gpu, #psx_gpu_render_mode_offset] - tsteq r12, #RENDER_INTERLACE_ENABLED - beq setup_sprite_untextured_simple - +function(setup_sprite_untextured_512) stmdb sp!, { r4 - r11, r14 } ldr width, [sp, #40] @@ -6133,6 +6129,7 @@ function(scale2x_tiles8) mov r14, r2 0: + pld [r1, #1024*2] vld1.u16 { q0 }, [r1, :128]! vld1.u16 { q2 }, [r1, :128]! vmov q1, q0