X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=6108bc35b26d12951b457f9c55776b0bd17f2015;hp=fd9979808f81814a44c6e56e58dd846eb48b71fb;hb=ad38f92fe406f2f0c9008e5a85d7e02a6410f9e5;hpb=3867c6efed8d1cd6cd40f07cd46876f59da8912f diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index fd997980..6108bc35 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -241,6 +241,18 @@ .align 4 +/* FIXME: users of this should be in psx_gpu instead */ +#ifndef __PIC__ +#define load_pointer(register, pointer) \ + movw register, :lower16:pointer; \ + movt register, :upper16:pointer; \ + +#else +#define load_pointer(register, pointer) \ + ldr register, =pointer \ + +#endif + #define function(name) \ .global name; \ name: \ @@ -609,8 +621,7 @@ function(compute_all_gradients) vld1.32 { uvrg }, [ temp ]; \ add temp, psx_gpu, #psx_gpu_uvrg_dy_offset; \ vld1.32 { uvrg_dy }, [ temp ]; \ - movw reciprocal_table_ptr, :lower16:reciprocal_table; \ - movt reciprocal_table_ptr, :upper16:reciprocal_table; \ + load_pointer(reciprocal_table_ptr, reciprocal_table); \ \ vmov.u32 c_0x01, #0x01 \ @@ -1016,6 +1027,7 @@ function(setup_spans_up_left) function(setup_spans_up_right) setup_spans_up_up(right, left) +.pool #define setup_spans_down_down(minor, major) \ setup_spans_prologue(); \ @@ -1224,6 +1236,7 @@ function(setup_spans_up_down) setup_spans_prologue_b() bal 4b +.pool #undef span_uvrg_offset #undef span_edge_data @@ -2002,8 +2015,6 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) 1: add span_edge_data, span_edge_data, #8 subs num_spans, num_spans, #1 - - strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] bne 0b ldmia sp!, { r4 - r11, pc } @@ -4339,102 +4350,52 @@ function(warmup) bx lr +#undef vram_ptr #undef color -#undef y +#undef width #undef height - -#define psx_gpu r0 -#define color r1 -#define x r2 -#define y r3 +#undef pitch #define vram_ptr r0 -#define width r3 -#define height r12 - -#define parameter_width_offset 0 -#define parameter_height_offset 4 +#define color r1 +#define width r2 +#define height r3 -#define color_r r14 -#define color_g r4 -#define color_b r5 +#define pitch r1 -#define left_unaligned r14 -#define right_unaligned r4 -#define pitch r5 -#define num_unaligned r2 -#define num_width r6 +#define num_width r12 -#undef colors +#undef colors_a +#undef colors_b -#define colors q0 +#define colors_a q0 +#define colors_b q1 .align 3 function(render_block_fill_body) - ldr vram_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] - ldr height, [ sp, #parameter_height_offset ] - - add vram_ptr, vram_ptr, y, lsl #11 - ldr width, [ sp, #parameter_width_offset ] - - add vram_ptr, vram_ptr, x, lsl #1 - stmdb sp!, { r4 - r6, r14 } - - ubfx color_r, color, #3, #5 - ubfx color_g, color, #11, #5 - - ubfx color_b, color, #19, #5 - orr color, color_r, color_g, lsl #5 - - orr color, color, color_b, lsl #10 - add left_unaligned, x, #0x7 - - bic left_unaligned, left_unaligned, #0x7 - vdup.u16 colors, color - - sub left_unaligned, left_unaligned, x + vdup.u16 colors_a, color mov pitch, #2048 + vmov colors_b, colors_a sub pitch, pitch, width, lsl #1 - sub width, width, left_unaligned - - and right_unaligned, width, #0x7 - bic width, width, #0x7 - 0: - mov num_width, width, lsr #3 + mov num_width, width - movs num_unaligned, left_unaligned - beq 2f + 0: + vst1.u32 { colors_a, colors_b }, [ vram_ptr, :256 ]! - 1: - strh color, [ vram_ptr ], #2 - - subs num_unaligned, num_unaligned, #1 - bne 1b - - 2: - vst1.u32 { colors }, [ vram_ptr, :128 ]! - subs num_width, num_width, #1 - bne 2b - - movs num_unaligned, right_unaligned - beq 4f - - 3: - strh color, [ vram_ptr ], #2 - - subs num_unaligned, num_unaligned, #1 - bne 3b + subs num_width, num_width, #2 + bne 0b - 4: add vram_ptr, vram_ptr, pitch + mov num_width, width + subs height, height, #1 bne 0b - - ldmia sp!, { r4 - r6, pc } + bx lr + #undef x #undef y