spu: threaded implementation
[pcsx_rearmed.git] / plugins / dfsound / arm_utils.S
CommitLineData
b17618c0 1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
665f33e1 11#include "arm_features.h"
b17618c0 12
4ae83961 13#ifdef __MACH__
14.data
15.align 2
1f4e070a 16ptr_ChanBuf: .word ESYM(ChanBuf)
17ptr_SSumLR: .word ESYM(SSumLR)
4ae83961 18#endif
19
b17618c0 20.text
21.align 2
22
c67af2ac 23.macro load_varadr reg var
24#if defined(__ARM_ARCH_7A__) && !defined(__PIC__)
4ae83961 25 movw \reg, #:lower16:ESYM(\var)
26 movt \reg, #:upper16:ESYM(\var)
27#elif defined(__ARM_ARCH_7A__) && defined(__MACH__)
1f4e070a 28 movw \reg, #:lower16:(ptr_\var-(1678f+8))
29 movt \reg, #:upper16:(ptr_\var-(1678f+8))
4ae83961 301678:
31 ldr \reg, [pc, \reg]
c67af2ac 32#else
4ae83961 33 ldr \reg, =ESYM(\var)
c67af2ac 34#endif
35.endm
36
37#ifdef __ARM_NEON__
b17618c0 38
5c6457c3 39FUNCTION(mix_chan): @ (int start, int count, int lv, int rv)
b17618c0 40 vmov.32 d14[0], r2
41 vmov.32 d14[1], r3 @ multipliers
42 mov r12, r0
c67af2ac 43 load_varadr r0, ChanBuf
44 load_varadr r2, SSumLR
b17618c0 45 add r0, r12, lsl #2
46 add r2, r12, lsl #3
470:
48 vldmia r0!, {d0-d1}
49 vldmia r2, {d2-d5}
50 vmul.s32 d10, d14, d0[0]
51 vmul.s32 d11, d14, d0[1]
52 vmul.s32 d12, d14, d1[0]
53 vmul.s32 d13, d14, d1[1]
54 vsra.s32 q1, q5, #14
55 vsra.s32 q2, q6, #14
56 subs r1, #4
57 blt mc_finish
58 vstmia r2!, {d2-d5}
59 bgt 0b
60 nop
61 bxeq lr
62
63mc_finish:
64 vstmia r2!, {d2}
587fa7de 65 cmp r1, #-2
b17618c0 66 vstmiage r2!, {d3}
587fa7de 67 cmp r1, #-1
b17618c0 68 vstmiage r2!, {d4}
69 bx lr
70
71
3154bfab 72FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb)
b17618c0 73 vmov.32 d14[0], r2
74 vmov.32 d14[1], r3 @ multipliers
75 mov r12, r0
c67af2ac 76 load_varadr r0, ChanBuf
3154bfab 77 ldr r3, [sp] @ rvb
c67af2ac 78 load_varadr r2, SSumLR
b17618c0 79 add r0, r12, lsl #2
80 add r2, r12, lsl #3
81 add r3, r12, lsl #3
820:
83 vldmia r0!, {d0-d1}
84 vldmia r2, {d2-d5}
85 vldmia r3, {d6-d9}
86 vmul.s32 d10, d14, d0[0]
87 vmul.s32 d11, d14, d0[1]
88 vmul.s32 d12, d14, d1[0]
89 vmul.s32 d13, d14, d1[1]
90 vsra.s32 q1, q5, #14
91 vsra.s32 q2, q6, #14
92 vsra.s32 q3, q5, #14
93 vsra.s32 q4, q6, #14
94 subs r1, #4
95 blt mcr_finish
96 vstmia r2!, {d2-d5}
97 vstmia r3!, {d6-d9}
98 bgt 0b
99 nop
100 bxeq lr
101
102mcr_finish:
103 vstmia r2!, {d2}
104 vstmia r3!, {d6}
587fa7de 105 cmp r1, #-2
b17618c0 106 vstmiage r2!, {d3}
107 vstmiage r3!, {d7}
587fa7de 108 cmp r1, #-1
b17618c0 109 vstmiage r2!, {d4}
110 vstmiage r3!, {d8}
111 bx lr
112
665f33e1 113#elif defined(HAVE_ARMV5)
3a721c1f 114
5c6457c3 115FUNCTION(mix_chan): @ (int start, int count, int lv, int rv)
3a721c1f 116 stmfd sp!, {r4-r8,lr}
117 orr r3, r2, r3, lsl #16
118 lsl r3, #1 @ packed multipliers << 1
119 mov r12, r0
c67af2ac 120 load_varadr r0, ChanBuf
121 load_varadr r2, SSumLR
3a721c1f 122 add r0, r12, lsl #2
123 add r2, r12, lsl #3
1240:
125 ldmia r0!, {r4,r5}
126 ldmia r2, {r6-r8,lr}
127 lsl r4, #1 @ adjust for mul
128 lsl r5, #1
129 smlawb r6, r4, r3, r6
130 smlawt r7, r4, r3, r7
131 smlawb r8, r5, r3, r8
132 smlawt lr, r5, r3, lr
133 subs r1, #2
134 blt mc_finish
135 stmia r2!, {r6-r8,lr}
136 bgt 0b
137 ldmeqfd sp!, {r4-r8,pc}
138
139mc_finish:
140 stmia r2!, {r6,r7}
141 ldmfd sp!, {r4-r8,pc}
142
143
3154bfab 144FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb)
3a721c1f 145 stmfd sp!, {r4-r8,lr}
146 orr lr, r2, r3, lsl #16
147 lsl lr, #1
3154bfab 148 ldr r3, [sp] @ rvb
c67af2ac 149 load_varadr r2, SSumLR
150 load_varadr r4, ChanBuf
3a721c1f 151 add r2, r2, r0, lsl #3
152 add r3, r3, r0, lsl #3
153 add r0, r4, r0, lsl #2
1540:
155 ldr r4, [r0], #4
156 ldmia r2, {r6,r7}
157 ldmia r3, {r8,r12}
158 lsl r4, #1
159 smlawb r6, r4, lr, r6 @ supposedly takes single cycle?
160 smlawt r7, r4, lr, r7
161 smlawb r8, r4, lr, r8
162 smlawt r12,r4, lr, r12
163 subs r1, #1
164 stmia r2!, {r6,r7}
165 stmia r3!, {r8,r12}
166 bgt 0b
167 ldmfd sp!, {r4-r8,pc}
168
c67af2ac 169#endif
3a721c1f 170
b17618c0 171@ vim:filetype=armasm