tiny little android neon build fix
[pcsx_rearmed.git] / plugins / dfsound / arm_utils.S
CommitLineData
b17618c0 1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
665f33e1 11#include "arm_features.h"
b17618c0 12
13.text
14.align 2
15
c67af2ac 16.macro load_varadr reg var
17#if defined(__ARM_ARCH_7A__) && !defined(__PIC__)
5c6457c3 18 movw \reg, #:lower16:EVAR(\var)
19 movt \reg, #:upper16:EVAR(\var)
c67af2ac 20#else
5c6457c3 21 ldr \reg, =EVAR(\var)
c67af2ac 22#endif
23.endm
24
25#ifdef __ARM_NEON__
b17618c0 26
5c6457c3 27FUNCTION(mix_chan): @ (int start, int count, int lv, int rv)
b17618c0 28 vmov.32 d14[0], r2
29 vmov.32 d14[1], r3 @ multipliers
30 mov r12, r0
c67af2ac 31 load_varadr r0, ChanBuf
32 load_varadr r2, SSumLR
b17618c0 33 add r0, r12, lsl #2
34 add r2, r12, lsl #3
350:
36 vldmia r0!, {d0-d1}
37 vldmia r2, {d2-d5}
38 vmul.s32 d10, d14, d0[0]
39 vmul.s32 d11, d14, d0[1]
40 vmul.s32 d12, d14, d1[0]
41 vmul.s32 d13, d14, d1[1]
42 vsra.s32 q1, q5, #14
43 vsra.s32 q2, q6, #14
44 subs r1, #4
45 blt mc_finish
46 vstmia r2!, {d2-d5}
47 bgt 0b
48 nop
49 bxeq lr
50
51mc_finish:
52 vstmia r2!, {d2}
587fa7de 53 cmp r1, #-2
b17618c0 54 vstmiage r2!, {d3}
587fa7de 55 cmp r1, #-1
b17618c0 56 vstmiage r2!, {d4}
57 bx lr
58
59
5c6457c3 60FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv)
b17618c0 61 vmov.32 d14[0], r2
62 vmov.32 d14[1], r3 @ multipliers
63 mov r12, r0
c67af2ac 64 load_varadr r0, ChanBuf
65 load_varadr r3, sRVBStart
66 load_varadr r2, SSumLR
b17618c0 67 ldr r3, [r3]
68 add r0, r12, lsl #2
69 add r2, r12, lsl #3
70 add r3, r12, lsl #3
710:
72 vldmia r0!, {d0-d1}
73 vldmia r2, {d2-d5}
74 vldmia r3, {d6-d9}
75 vmul.s32 d10, d14, d0[0]
76 vmul.s32 d11, d14, d0[1]
77 vmul.s32 d12, d14, d1[0]
78 vmul.s32 d13, d14, d1[1]
79 vsra.s32 q1, q5, #14
80 vsra.s32 q2, q6, #14
81 vsra.s32 q3, q5, #14
82 vsra.s32 q4, q6, #14
83 subs r1, #4
84 blt mcr_finish
85 vstmia r2!, {d2-d5}
86 vstmia r3!, {d6-d9}
87 bgt 0b
88 nop
89 bxeq lr
90
91mcr_finish:
92 vstmia r2!, {d2}
93 vstmia r3!, {d6}
587fa7de 94 cmp r1, #-2
b17618c0 95 vstmiage r2!, {d3}
96 vstmiage r3!, {d7}
587fa7de 97 cmp r1, #-1
b17618c0 98 vstmiage r2!, {d4}
99 vstmiage r3!, {d8}
100 bx lr
101
665f33e1 102#elif defined(HAVE_ARMV5)
3a721c1f 103
5c6457c3 104FUNCTION(mix_chan): @ (int start, int count, int lv, int rv)
3a721c1f 105 stmfd sp!, {r4-r8,lr}
106 orr r3, r2, r3, lsl #16
107 lsl r3, #1 @ packed multipliers << 1
108 mov r12, r0
c67af2ac 109 load_varadr r0, ChanBuf
110 load_varadr r2, SSumLR
3a721c1f 111 add r0, r12, lsl #2
112 add r2, r12, lsl #3
1130:
114 ldmia r0!, {r4,r5}
115 ldmia r2, {r6-r8,lr}
116 lsl r4, #1 @ adjust for mul
117 lsl r5, #1
118 smlawb r6, r4, r3, r6
119 smlawt r7, r4, r3, r7
120 smlawb r8, r5, r3, r8
121 smlawt lr, r5, r3, lr
122 subs r1, #2
123 blt mc_finish
124 stmia r2!, {r6-r8,lr}
125 bgt 0b
126 ldmeqfd sp!, {r4-r8,pc}
127
128mc_finish:
129 stmia r2!, {r6,r7}
130 ldmfd sp!, {r4-r8,pc}
131
132
5c6457c3 133FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv)
3a721c1f 134 stmfd sp!, {r4-r8,lr}
135 orr lr, r2, r3, lsl #16
136 lsl lr, #1
c67af2ac 137 load_varadr r3, sRVBStart
138 load_varadr r2, SSumLR
139 load_varadr r4, ChanBuf
3a721c1f 140 ldr r3, [r3]
141 add r2, r2, r0, lsl #3
142 add r3, r3, r0, lsl #3
143 add r0, r4, r0, lsl #2
1440:
145 ldr r4, [r0], #4
146 ldmia r2, {r6,r7}
147 ldmia r3, {r8,r12}
148 lsl r4, #1
149 smlawb r6, r4, lr, r6 @ supposedly takes single cycle?
150 smlawt r7, r4, lr, r7
151 smlawb r8, r4, lr, r8
152 smlawt r12,r4, lr, r12
153 subs r1, #1
154 stmia r2!, {r6,r7}
155 stmia r3!, {r8,r12}
156 bgt 0b
157 ldmfd sp!, {r4-r8,pc}
158
c67af2ac 159#endif
3a721c1f 160
b17618c0 161@ vim:filetype=armasm