spu: finish offload code to TI C64x DSP
[pcsx_rearmed.git] / plugins / dfsound / arm_utils.S
CommitLineData
b17618c0 1/*
2 * (C) GraÅžvydas "notaz" Ignotas, 2011
3 *
4 * This work is licensed under the terms of any of these licenses
5 * (at your option):
6 * - GNU GPL, version 2 or later.
7 * - GNU LGPL, version 2.1 or later.
8 * See the COPYING file in the top-level directory.
9 */
10
665f33e1 11#include "arm_features.h"
b17618c0 12
4ae83961 13#ifdef __MACH__
14.data
15.align 2
1f4e070a 16ptr_ChanBuf: .word ESYM(ChanBuf)
17ptr_SSumLR: .word ESYM(SSumLR)
4ae83961 18#endif
19
b17618c0 20.text
21.align 2
22
c67af2ac 23.macro load_varadr reg var
24#if defined(__ARM_ARCH_7A__) && !defined(__PIC__)
4ae83961 25 movw \reg, #:lower16:ESYM(\var)
26 movt \reg, #:upper16:ESYM(\var)
27#elif defined(__ARM_ARCH_7A__) && defined(__MACH__)
1f4e070a 28 movw \reg, #:lower16:(ptr_\var-(1678f+8))
29 movt \reg, #:upper16:(ptr_\var-(1678f+8))
4ae83961 301678:
31 ldr \reg, [pc, \reg]
c67af2ac 32#else
4ae83961 33 ldr \reg, =ESYM(\var)
c67af2ac 34#endif
35.endm
36
37#ifdef __ARM_NEON__
b17618c0 38
5c6457c3 39FUNCTION(mix_chan): @ (int start, int count, int lv, int rv)
b17618c0 40 vmov.32 d14[0], r2
41 vmov.32 d14[1], r3 @ multipliers
5514a050 42 load_varadr r2, SSumLR
b17618c0 43 mov r12, r0
c67af2ac 44 load_varadr r0, ChanBuf
5514a050 45 ldr r2, [r2]
b17618c0 46 add r0, r12, lsl #2
47 add r2, r12, lsl #3
480:
49 vldmia r0!, {d0-d1}
50 vldmia r2, {d2-d5}
51 vmul.s32 d10, d14, d0[0]
52 vmul.s32 d11, d14, d0[1]
53 vmul.s32 d12, d14, d1[0]
54 vmul.s32 d13, d14, d1[1]
55 vsra.s32 q1, q5, #14
56 vsra.s32 q2, q6, #14
57 subs r1, #4
58 blt mc_finish
59 vstmia r2!, {d2-d5}
60 bgt 0b
61 nop
62 bxeq lr
63
64mc_finish:
65 vstmia r2!, {d2}
587fa7de 66 cmp r1, #-2
b17618c0 67 vstmiage r2!, {d3}
587fa7de 68 cmp r1, #-1
b17618c0 69 vstmiage r2!, {d4}
70 bx lr
71
72
3154bfab 73FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb)
b17618c0 74 vmov.32 d14[0], r2
75 vmov.32 d14[1], r3 @ multipliers
5514a050 76 load_varadr r2, SSumLR
b17618c0 77 mov r12, r0
c67af2ac 78 load_varadr r0, ChanBuf
3154bfab 79 ldr r3, [sp] @ rvb
5514a050 80 ldr r2, [r2]
b17618c0 81 add r0, r12, lsl #2
82 add r2, r12, lsl #3
83 add r3, r12, lsl #3
840:
85 vldmia r0!, {d0-d1}
86 vldmia r2, {d2-d5}
87 vldmia r3, {d6-d9}
88 vmul.s32 d10, d14, d0[0]
89 vmul.s32 d11, d14, d0[1]
90 vmul.s32 d12, d14, d1[0]
91 vmul.s32 d13, d14, d1[1]
92 vsra.s32 q1, q5, #14
93 vsra.s32 q2, q6, #14
94 vsra.s32 q3, q5, #14
95 vsra.s32 q4, q6, #14
96 subs r1, #4
97 blt mcr_finish
98 vstmia r2!, {d2-d5}
99 vstmia r3!, {d6-d9}
100 bgt 0b
101 nop
102 bxeq lr
103
104mcr_finish:
105 vstmia r2!, {d2}
106 vstmia r3!, {d6}
587fa7de 107 cmp r1, #-2
b17618c0 108 vstmiage r2!, {d3}
109 vstmiage r3!, {d7}
587fa7de 110 cmp r1, #-1
b17618c0 111 vstmiage r2!, {d4}
112 vstmiage r3!, {d8}
113 bx lr
114
665f33e1 115#elif defined(HAVE_ARMV5)
3a721c1f 116
5c6457c3 117FUNCTION(mix_chan): @ (int start, int count, int lv, int rv)
3a721c1f 118 stmfd sp!, {r4-r8,lr}
119 orr r3, r2, r3, lsl #16
120 lsl r3, #1 @ packed multipliers << 1
5514a050 121 load_varadr r2, SSumLR
3a721c1f 122 mov r12, r0
c67af2ac 123 load_varadr r0, ChanBuf
5514a050 124 ldr r2, [r2]
3a721c1f 125 add r0, r12, lsl #2
126 add r2, r12, lsl #3
1270:
128 ldmia r0!, {r4,r5}
129 ldmia r2, {r6-r8,lr}
130 lsl r4, #1 @ adjust for mul
131 lsl r5, #1
132 smlawb r6, r4, r3, r6
133 smlawt r7, r4, r3, r7
134 smlawb r8, r5, r3, r8
135 smlawt lr, r5, r3, lr
136 subs r1, #2
137 blt mc_finish
138 stmia r2!, {r6-r8,lr}
139 bgt 0b
140 ldmeqfd sp!, {r4-r8,pc}
141
142mc_finish:
143 stmia r2!, {r6,r7}
144 ldmfd sp!, {r4-r8,pc}
145
146
3154bfab 147FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb)
3a721c1f 148 stmfd sp!, {r4-r8,lr}
149 orr lr, r2, r3, lsl #16
150 lsl lr, #1
c67af2ac 151 load_varadr r2, SSumLR
5514a050 152 ldr r3, [sp] @ rvb
153 ldr r2, [r2]
c67af2ac 154 load_varadr r4, ChanBuf
3a721c1f 155 add r2, r2, r0, lsl #3
156 add r3, r3, r0, lsl #3
157 add r0, r4, r0, lsl #2
1580:
159 ldr r4, [r0], #4
160 ldmia r2, {r6,r7}
161 ldmia r3, {r8,r12}
162 lsl r4, #1
163 smlawb r6, r4, lr, r6 @ supposedly takes single cycle?
164 smlawt r7, r4, lr, r7
165 smlawb r8, r4, lr, r8
166 smlawt r12,r4, lr, r12
167 subs r1, #1
168 stmia r2!, {r6,r7}
169 stmia r3!, {r8,r12}
170 bgt 0b
171 ldmfd sp!, {r4-r8,pc}
172
c67af2ac 173#endif
3a721c1f 174
b17618c0 175@ vim:filetype=armasm