misc: Use GCC builtins for byte-swap operations
[pcsx_rearmed.git] / plugins / dfsound / arm_utils.S
CommitLineData
b17618c0 1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
665f33e1 11#include "arm_features.h"
b17618c0 12
0e4ad319 13#ifdef TEXRELS_FORBIDDEN
4ae83961 14.data
15.align 2
1f4e070a 16ptr_ChanBuf: .word ESYM(ChanBuf)
4ae83961 17#endif
18
b17618c0 19.text
20.align 2
21
c67af2ac 22.macro load_varadr reg var
0e4ad319 23#if defined(HAVE_ARMV7) && defined(TEXRELS_FORBIDDEN)
1f4e070a 24 movw \reg, #:lower16:(ptr_\var-(1678f+8))
25 movt \reg, #:upper16:(ptr_\var-(1678f+8))
4ae83961 261678:
27 ldr \reg, [pc, \reg]
0e4ad319 28#elif defined(HAVE_ARMV7) && !defined(__PIC__)
29 movw \reg, #:lower16:ESYM(\var)
30 movt \reg, #:upper16:ESYM(\var)
c67af2ac 31#else
4ae83961 32 ldr \reg, =ESYM(\var)
c67af2ac 33#endif
34.endm
35
36#ifdef __ARM_NEON__
b17618c0 37
3bd31caf 38FUNCTION(mix_chan): @ (int *SSumLR, int count, int lv, int rv)
b17618c0 39 vmov.32 d14[0], r2
40 vmov.32 d14[1], r3 @ multipliers
3bd31caf 41 mov r2, r0
c67af2ac 42 load_varadr r0, ChanBuf
b17618c0 430:
44 vldmia r0!, {d0-d1}
45 vldmia r2, {d2-d5}
46 vmul.s32 d10, d14, d0[0]
47 vmul.s32 d11, d14, d0[1]
48 vmul.s32 d12, d14, d1[0]
49 vmul.s32 d13, d14, d1[1]
50 vsra.s32 q1, q5, #14
51 vsra.s32 q2, q6, #14
52 subs r1, #4
53 blt mc_finish
54 vstmia r2!, {d2-d5}
55 bgt 0b
56 nop
57 bxeq lr
58
59mc_finish:
60 vstmia r2!, {d2}
587fa7de 61 cmp r1, #-2
b17618c0 62 vstmiage r2!, {d3}
587fa7de 63 cmp r1, #-1
b17618c0 64 vstmiage r2!, {d4}
65 bx lr
66
67
3bd31caf 68FUNCTION(mix_chan_rvb): @ (int *SSumLR, int count, int lv, int rv, int *rvb)
b17618c0 69 vmov.32 d14[0], r2
70 vmov.32 d14[1], r3 @ multipliers
3bd31caf 71 mov r2, r0
c67af2ac 72 load_varadr r0, ChanBuf
3154bfab 73 ldr r3, [sp] @ rvb
b17618c0 740:
75 vldmia r0!, {d0-d1}
76 vldmia r2, {d2-d5}
77 vldmia r3, {d6-d9}
78 vmul.s32 d10, d14, d0[0]
79 vmul.s32 d11, d14, d0[1]
80 vmul.s32 d12, d14, d1[0]
81 vmul.s32 d13, d14, d1[1]
82 vsra.s32 q1, q5, #14
83 vsra.s32 q2, q6, #14
84 vsra.s32 q3, q5, #14
85 vsra.s32 q4, q6, #14
86 subs r1, #4
87 blt mcr_finish
88 vstmia r2!, {d2-d5}
89 vstmia r3!, {d6-d9}
90 bgt 0b
91 nop
92 bxeq lr
93
94mcr_finish:
95 vstmia r2!, {d2}
96 vstmia r3!, {d6}
587fa7de 97 cmp r1, #-2
b17618c0 98 vstmiage r2!, {d3}
99 vstmiage r3!, {d7}
587fa7de 100 cmp r1, #-1
b17618c0 101 vstmiage r2!, {d4}
102 vstmiage r3!, {d8}
103 bx lr
104
665f33e1 105#elif defined(HAVE_ARMV5)
3a721c1f 106
3bd31caf 107FUNCTION(mix_chan): @ (int *SSumLR, int count, int lv, int rv)
3a721c1f 108 stmfd sp!, {r4-r8,lr}
109 orr r3, r2, r3, lsl #16
110 lsl r3, #1 @ packed multipliers << 1
3bd31caf 111 mov r2, r0
c67af2ac 112 load_varadr r0, ChanBuf
3a721c1f 1130:
114 ldmia r0!, {r4,r5}
115 ldmia r2, {r6-r8,lr}
116 lsl r4, #1 @ adjust for mul
117 lsl r5, #1
118 smlawb r6, r4, r3, r6
119 smlawt r7, r4, r3, r7
120 smlawb r8, r5, r3, r8
121 smlawt lr, r5, r3, lr
122 subs r1, #2
123 blt mc_finish
124 stmia r2!, {r6-r8,lr}
125 bgt 0b
126 ldmeqfd sp!, {r4-r8,pc}
127
128mc_finish:
129 stmia r2!, {r6,r7}
130 ldmfd sp!, {r4-r8,pc}
131
132
3bd31caf 133FUNCTION(mix_chan_rvb): @ (int *SSumLR, int count, int lv, int rv, int *rvb)
3a721c1f 134 stmfd sp!, {r4-r8,lr}
135 orr lr, r2, r3, lsl #16
136 lsl lr, #1
3bd31caf 137 mov r2, r0
138 load_varadr r0, ChanBuf
139 ldr r3, [sp, #6*4] @ rvb
3a721c1f 1400:
141 ldr r4, [r0], #4
142 ldmia r2, {r6,r7}
143 ldmia r3, {r8,r12}
144 lsl r4, #1
145 smlawb r6, r4, lr, r6 @ supposedly takes single cycle?
146 smlawt r7, r4, lr, r7
147 smlawb r8, r4, lr, r8
148 smlawt r12,r4, lr, r12
149 subs r1, #1
150 stmia r2!, {r6,r7}
151 stmia r3!, {r8,r12}
152 bgt 0b
153 ldmfd sp!, {r4-r8,pc}
154
c67af2ac 155#endif
3a721c1f 156
b17618c0 157@ vim:filetype=armasm