b17618c0 |
1 | /* |
2 | * (C) GraÅžvydas "notaz" Ignotas, 2011 |
3 | * |
4 | * This work is licensed under the terms of any of these licenses |
5 | * (at your option): |
6 | * - GNU GPL, version 2 or later. |
7 | * - GNU LGPL, version 2.1 or later. |
8 | * See the COPYING file in the top-level directory. |
9 | */ |
10 | |
665f33e1 |
11 | #include "arm_features.h" |
b17618c0 |
12 | |
4ae83961 |
13 | #ifdef __MACH__ |
14 | .data |
15 | .align 2 |
1f4e070a |
16 | ptr_ChanBuf: .word ESYM(ChanBuf) |
17 | ptr_SSumLR: .word ESYM(SSumLR) |
4ae83961 |
18 | #endif |
19 | |
b17618c0 |
20 | .text |
21 | .align 2 |
22 | |
c67af2ac |
23 | .macro load_varadr reg var |
24 | #if defined(__ARM_ARCH_7A__) && !defined(__PIC__) |
4ae83961 |
25 | movw \reg, #:lower16:ESYM(\var) |
26 | movt \reg, #:upper16:ESYM(\var) |
27 | #elif defined(__ARM_ARCH_7A__) && defined(__MACH__) |
1f4e070a |
28 | movw \reg, #:lower16:(ptr_\var-(1678f+8)) |
29 | movt \reg, #:upper16:(ptr_\var-(1678f+8)) |
4ae83961 |
30 | 1678: |
31 | ldr \reg, [pc, \reg] |
c67af2ac |
32 | #else |
4ae83961 |
33 | ldr \reg, =ESYM(\var) |
c67af2ac |
34 | #endif |
35 | .endm |
36 | |
37 | #ifdef __ARM_NEON__ |
b17618c0 |
38 | |
5c6457c3 |
39 | FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) |
b17618c0 |
40 | vmov.32 d14[0], r2 |
41 | vmov.32 d14[1], r3 @ multipliers |
42 | mov r12, r0 |
c67af2ac |
43 | load_varadr r0, ChanBuf |
44 | load_varadr r2, SSumLR |
b17618c0 |
45 | add r0, r12, lsl #2 |
46 | add r2, r12, lsl #3 |
47 | 0: |
48 | vldmia r0!, {d0-d1} |
49 | vldmia r2, {d2-d5} |
50 | vmul.s32 d10, d14, d0[0] |
51 | vmul.s32 d11, d14, d0[1] |
52 | vmul.s32 d12, d14, d1[0] |
53 | vmul.s32 d13, d14, d1[1] |
54 | vsra.s32 q1, q5, #14 |
55 | vsra.s32 q2, q6, #14 |
56 | subs r1, #4 |
57 | blt mc_finish |
58 | vstmia r2!, {d2-d5} |
59 | bgt 0b |
60 | nop |
61 | bxeq lr |
62 | |
63 | mc_finish: |
64 | vstmia r2!, {d2} |
587fa7de |
65 | cmp r1, #-2 |
b17618c0 |
66 | vstmiage r2!, {d3} |
587fa7de |
67 | cmp r1, #-1 |
b17618c0 |
68 | vstmiage r2!, {d4} |
69 | bx lr |
70 | |
71 | |
3154bfab |
72 | FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb) |
b17618c0 |
73 | vmov.32 d14[0], r2 |
74 | vmov.32 d14[1], r3 @ multipliers |
75 | mov r12, r0 |
c67af2ac |
76 | load_varadr r0, ChanBuf |
3154bfab |
77 | ldr r3, [sp] @ rvb |
c67af2ac |
78 | load_varadr r2, SSumLR |
b17618c0 |
79 | add r0, r12, lsl #2 |
80 | add r2, r12, lsl #3 |
81 | add r3, r12, lsl #3 |
82 | 0: |
83 | vldmia r0!, {d0-d1} |
84 | vldmia r2, {d2-d5} |
85 | vldmia r3, {d6-d9} |
86 | vmul.s32 d10, d14, d0[0] |
87 | vmul.s32 d11, d14, d0[1] |
88 | vmul.s32 d12, d14, d1[0] |
89 | vmul.s32 d13, d14, d1[1] |
90 | vsra.s32 q1, q5, #14 |
91 | vsra.s32 q2, q6, #14 |
92 | vsra.s32 q3, q5, #14 |
93 | vsra.s32 q4, q6, #14 |
94 | subs r1, #4 |
95 | blt mcr_finish |
96 | vstmia r2!, {d2-d5} |
97 | vstmia r3!, {d6-d9} |
98 | bgt 0b |
99 | nop |
100 | bxeq lr |
101 | |
102 | mcr_finish: |
103 | vstmia r2!, {d2} |
104 | vstmia r3!, {d6} |
587fa7de |
105 | cmp r1, #-2 |
b17618c0 |
106 | vstmiage r2!, {d3} |
107 | vstmiage r3!, {d7} |
587fa7de |
108 | cmp r1, #-1 |
b17618c0 |
109 | vstmiage r2!, {d4} |
110 | vstmiage r3!, {d8} |
111 | bx lr |
112 | |
665f33e1 |
113 | #elif defined(HAVE_ARMV5) |
3a721c1f |
114 | |
5c6457c3 |
115 | FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) |
3a721c1f |
116 | stmfd sp!, {r4-r8,lr} |
117 | orr r3, r2, r3, lsl #16 |
118 | lsl r3, #1 @ packed multipliers << 1 |
119 | mov r12, r0 |
c67af2ac |
120 | load_varadr r0, ChanBuf |
121 | load_varadr r2, SSumLR |
3a721c1f |
122 | add r0, r12, lsl #2 |
123 | add r2, r12, lsl #3 |
124 | 0: |
125 | ldmia r0!, {r4,r5} |
126 | ldmia r2, {r6-r8,lr} |
127 | lsl r4, #1 @ adjust for mul |
128 | lsl r5, #1 |
129 | smlawb r6, r4, r3, r6 |
130 | smlawt r7, r4, r3, r7 |
131 | smlawb r8, r5, r3, r8 |
132 | smlawt lr, r5, r3, lr |
133 | subs r1, #2 |
134 | blt mc_finish |
135 | stmia r2!, {r6-r8,lr} |
136 | bgt 0b |
137 | ldmeqfd sp!, {r4-r8,pc} |
138 | |
139 | mc_finish: |
140 | stmia r2!, {r6,r7} |
141 | ldmfd sp!, {r4-r8,pc} |
142 | |
143 | |
3154bfab |
144 | FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv, int *rvb) |
3a721c1f |
145 | stmfd sp!, {r4-r8,lr} |
146 | orr lr, r2, r3, lsl #16 |
147 | lsl lr, #1 |
3154bfab |
148 | ldr r3, [sp] @ rvb |
c67af2ac |
149 | load_varadr r2, SSumLR |
150 | load_varadr r4, ChanBuf |
3a721c1f |
151 | add r2, r2, r0, lsl #3 |
152 | add r3, r3, r0, lsl #3 |
153 | add r0, r4, r0, lsl #2 |
154 | 0: |
155 | ldr r4, [r0], #4 |
156 | ldmia r2, {r6,r7} |
157 | ldmia r3, {r8,r12} |
158 | lsl r4, #1 |
159 | smlawb r6, r4, lr, r6 @ supposedly takes single cycle? |
160 | smlawt r7, r4, lr, r7 |
161 | smlawb r8, r4, lr, r8 |
162 | smlawt r12,r4, lr, r12 |
163 | subs r1, #1 |
164 | stmia r2!, {r6,r7} |
165 | stmia r3!, {r8,r12} |
166 | bgt 0b |
167 | ldmfd sp!, {r4-r8,pc} |
168 | |
c67af2ac |
169 | #endif |
3a721c1f |
170 | |
b17618c0 |
171 | @ vim:filetype=armasm |