X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fgte_neon.S;h=fe153e204a8315bc32bb57674eab86f2a5903acc;hp=d83cf2321bb22354647d5c510846f69d177c41be;hb=bdd050c3ed792381df2e744fee5b7ee80b93fd68;hpb=a53073ec52f5944c7833e176fec5ac49dc9e1e45 diff --git a/libpcsxcore/gte_neon.S b/libpcsxcore/gte_neon.S index d83cf232..fe153e20 100644 --- a/libpcsxcore/gte_neon.S +++ b/libpcsxcore/gte_neon.S @@ -5,29 +5,13 @@ * See the COPYING file in the top-level directory. */ +#include "arm_features.h" +#include "new_dynarec/linkage_offsets.h" .syntax unified - -.bss -.align 6 @ cacheline - -scratch: -.rept 8*8*2/4 - .word 0 -.endr - .text .align 2 -.macro ldr_scratch rd -#ifndef __PIC__ - movw \rd, #:lower16:scratch - movt \rd, #:upper16:scratch -#else - ldr \rd, =scratch -#endif -.endm - @ XXX: gteMAC calc shouldn't be saturating, but it is here @ approximate gteMAC|123 flags @@ -145,12 +129,11 @@ scratch: vqmovn.s32 d10, q4 @ gteIR|123; losing 2 cycles? .endm -.global gteRTPS_neon @ r0=CP2 (d,c), -gteRTPS_neon: +FUNCTION(gteRTPS_neon): @ r0=CP2 (d,c), push {r4-r6,lr} @ fmrx r4, fpscr @ vmrs? at least 40 cycle hit - ldr_scratch r1 + ldr r1, [r0, #LO_cop2_to_scratch_buf] mov r12, #0 vldmia r0, {d8} @ VXYZ(0) @@ -299,11 +282,10 @@ gteRTPS_neon: -.global gteRTPT_neon @ r0=CP2 (d,c), -gteRTPT_neon: +FUNCTION(gteRTPT_neon): @ r0=CP2 (d,c), push {r4-r11,lr} - ldr_scratch r1 + ldr r1, [r0, #LO_cop2_to_scratch_buf] mov r12, #0 rtpx_preload @@ -546,8 +528,7 @@ gteRTPT_neon: @ r4,r5 = VXYZ(v) packed @ r6 = &MX11(mx) @ r7 = &CV1(cv) -.global gteMVMVA_part_neon -gteMVMVA_part_neon: +FUNCTION(gteMVMVA_part_neon): uxth r5, r5 vmov.32 d8[0], r4 vmov.32 d8[1], r5 @ VXYZ(v) @@ -594,8 +575,7 @@ gteMVMVA_part_neon: @ get flags after gteMVMVA_part_neon operation -.global gteMACtoIR_flags_neon @ r0=CP2 (d,c), r1=lm -gteMACtoIR_flags_neon: +FUNCTION(gteMACtoIR_flags_neon): @ r0=CP2 (d,c), r1=lm push {r4,r5,lr} tst r1, r1 @ lm mov lr, #0 @ gteFLAG