b17618c0 |
1 | /* |
2 | * (C) GraÅžvydas "notaz" Ignotas, 2011 |
3 | * |
4 | * This work is licensed under the terms of any of these licenses |
5 | * (at your option): |
6 | * - GNU GPL, version 2 or later. |
7 | * - GNU LGPL, version 2.1 or later. |
8 | * See the COPYING file in the top-level directory. |
9 | */ |
10 | |
665f33e1 |
11 | #include "arm_features.h" |
b17618c0 |
12 | |
4ae83961 |
13 | #ifdef __MACH__ |
14 | .data |
15 | .align 2 |
1f4e070a |
16 | ptr_ChanBuf: .word ESYM(ChanBuf) |
17 | ptr_SSumLR: .word ESYM(SSumLR) |
4ae83961 |
18 | #endif |
19 | |
b17618c0 |
20 | .text |
21 | .align 2 |
22 | |
c67af2ac |
23 | .macro load_varadr reg var |
24 | #if defined(__ARM_ARCH_7A__) && !defined(__PIC__) |
4ae83961 |
25 | movw \reg, #:lower16:ESYM(\var) |
26 | movt \reg, #:upper16:ESYM(\var) |
27 | #elif defined(__ARM_ARCH_7A__) && defined(__MACH__) |
1f4e070a |
28 | movw \reg, #:lower16:(ptr_\var-(1678f+8)) |
29 | movt \reg, #:upper16:(ptr_\var-(1678f+8)) |
4ae83961 |
30 | 1678: |
31 | ldr \reg, [pc, \reg] |
c67af2ac |
32 | #else |
4ae83961 |
33 | ldr \reg, =ESYM(\var) |
c67af2ac |
34 | #endif |
35 | .endm |
36 | |
37 | #ifdef __ARM_NEON__ |
b17618c0 |
38 | |
5c6457c3 |
39 | FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) |
b17618c0 |
40 | vmov.32 d14[0], r2 |
41 | vmov.32 d14[1], r3 @ multipliers |
5514a050 |
42 | load_varadr r2, SSumLR |
b17618c0 |
43 | mov r12, r0 |
c67af2ac |
44 | load_varadr r0, ChanBuf |
5514a050 |
45 | ldr r2, [r2] |
b17618c0 |
46 | add r0, r12, lsl #2 |
47 | add r2, r12, lsl #3 |
48 | 0: |
49 | vldmia r0!, {d0-d1} |
50 | vldmia r2, {d2-d5} |
51 | vmul.s32 d10, d14, d0[0] |
52 | vmul.s32 d11, d14, d0[1] |
53 | vmul.s32 d12, d14, d1[0] |
54 | vmul.s32 d13, d14, d1[1] |
55 | vsra.s32 q1, q5, #14 |
56 | vsra.s32 q2, q6, #14 |
57 | subs r1, #4 |
58 | blt mc_finish |
59 | vstmia r2!, {d2-d5} |
60 | bgt 0b |
61 | nop |
62 | bxeq lr |
63 | |
64 | mc_finish: |
65 | vstmia r2!, {d2} |
587fa7de |
66 | cmp r1, #-2 |
b17618c0 |
67 | vstmiage r2!, {d3} |
587fa7de |
68 | cmp r1, #-1 |
b17618c0 |
69 | vstmiage r2!, {d4} |
70 | bx lr |
71 | |
72 | |
3154bfab |
73 | FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb) |
b17618c0 |
74 | vmov.32 d14[0], r2 |
75 | vmov.32 d14[1], r3 @ multipliers |
5514a050 |
76 | load_varadr r2, SSumLR |
b17618c0 |
77 | mov r12, r0 |
c67af2ac |
78 | load_varadr r0, ChanBuf |
3154bfab |
79 | ldr r3, [sp] @ rvb |
5514a050 |
80 | ldr r2, [r2] |
b17618c0 |
81 | add r0, r12, lsl #2 |
82 | add r2, r12, lsl #3 |
83 | add r3, r12, lsl #3 |
84 | 0: |
85 | vldmia r0!, {d0-d1} |
86 | vldmia r2, {d2-d5} |
87 | vldmia r3, {d6-d9} |
88 | vmul.s32 d10, d14, d0[0] |
89 | vmul.s32 d11, d14, d0[1] |
90 | vmul.s32 d12, d14, d1[0] |
91 | vmul.s32 d13, d14, d1[1] |
92 | vsra.s32 q1, q5, #14 |
93 | vsra.s32 q2, q6, #14 |
94 | vsra.s32 q3, q5, #14 |
95 | vsra.s32 q4, q6, #14 |
96 | subs r1, #4 |
97 | blt mcr_finish |
98 | vstmia r2!, {d2-d5} |
99 | vstmia r3!, {d6-d9} |
100 | bgt 0b |
101 | nop |
102 | bxeq lr |
103 | |
104 | mcr_finish: |
105 | vstmia r2!, {d2} |
106 | vstmia r3!, {d6} |
587fa7de |
107 | cmp r1, #-2 |
b17618c0 |
108 | vstmiage r2!, {d3} |
109 | vstmiage r3!, {d7} |
587fa7de |
110 | cmp r1, #-1 |
b17618c0 |
111 | vstmiage r2!, {d4} |
112 | vstmiage r3!, {d8} |
113 | bx lr |
114 | |
665f33e1 |
115 | #elif defined(HAVE_ARMV5) |
3a721c1f |
116 | |
5c6457c3 |
117 | FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) |
3a721c1f |
118 | stmfd sp!, {r4-r8,lr} |
119 | orr r3, r2, r3, lsl #16 |
120 | lsl r3, #1 @ packed multipliers << 1 |
5514a050 |
121 | load_varadr r2, SSumLR |
3a721c1f |
122 | mov r12, r0 |
c67af2ac |
123 | load_varadr r0, ChanBuf |
5514a050 |
124 | ldr r2, [r2] |
3a721c1f |
125 | add r0, r12, lsl #2 |
126 | add r2, r12, lsl #3 |
127 | 0: |
128 | ldmia r0!, {r4,r5} |
129 | ldmia r2, {r6-r8,lr} |
130 | lsl r4, #1 @ adjust for mul |
131 | lsl r5, #1 |
132 | smlawb r6, r4, r3, r6 |
133 | smlawt r7, r4, r3, r7 |
134 | smlawb r8, r5, r3, r8 |
135 | smlawt lr, r5, r3, lr |
136 | subs r1, #2 |
137 | blt mc_finish |
138 | stmia r2!, {r6-r8,lr} |
139 | bgt 0b |
140 | ldmeqfd sp!, {r4-r8,pc} |
141 | |
142 | mc_finish: |
143 | stmia r2!, {r6,r7} |
144 | ldmfd sp!, {r4-r8,pc} |
145 | |
146 | |
3154bfab |
147 | FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb) |
3a721c1f |
148 | stmfd sp!, {r4-r8,lr} |
149 | orr lr, r2, r3, lsl #16 |
150 | lsl lr, #1 |
c67af2ac |
151 | load_varadr r2, SSumLR |
5514a050 |
152 | ldr r3, [sp] @ rvb |
153 | ldr r2, [r2] |
c67af2ac |
154 | load_varadr r4, ChanBuf |
3a721c1f |
155 | add r2, r2, r0, lsl #3 |
156 | add r3, r3, r0, lsl #3 |
157 | add r0, r4, r0, lsl #2 |
158 | 0: |
159 | ldr r4, [r0], #4 |
160 | ldmia r2, {r6,r7} |
161 | ldmia r3, {r8,r12} |
162 | lsl r4, #1 |
163 | smlawb r6, r4, lr, r6 @ supposedly takes single cycle? |
164 | smlawt r7, r4, lr, r7 |
165 | smlawb r8, r4, lr, r8 |
166 | smlawt r12,r4, lr, r12 |
167 | subs r1, #1 |
168 | stmia r2!, {r6,r7} |
169 | stmia r3!, {r8,r12} |
170 | bgt 0b |
171 | ldmfd sp!, {r4-r8,pc} |
172 | |
c67af2ac |
173 | #endif |
3a721c1f |
174 | |
b17618c0 |
175 | @ vim:filetype=armasm |