b17618c0 |
1 | /* |
2 | * (C) GraÅžvydas "notaz" Ignotas, 2011 |
3 | * |
4 | * This work is licensed under the terms of any of these licenses |
5 | * (at your option): |
6 | * - GNU GPL, version 2 or later. |
7 | * - GNU LGPL, version 2.1 or later. |
8 | * See the COPYING file in the top-level directory. |
9 | */ |
10 | |
665f33e1 |
11 | #include "arm_features.h" |
b17618c0 |
12 | |
4ae83961 |
13 | #ifdef __MACH__ |
14 | .data |
15 | .align 2 |
16 | ptr_ChanBuf: .word _ChanBuf |
17 | ptr_SSumLR: .word _SSumLR |
18 | ptr_sRVBStart: .word _sRVBStart |
19 | #endif |
20 | |
b17618c0 |
21 | .text |
22 | .align 2 |
23 | |
c67af2ac |
24 | .macro load_varadr reg var |
25 | #if defined(__ARM_ARCH_7A__) && !defined(__PIC__) |
4ae83961 |
26 | movw \reg, #:lower16:ESYM(\var) |
27 | movt \reg, #:upper16:ESYM(\var) |
28 | #elif defined(__ARM_ARCH_7A__) && defined(__MACH__) |
29 | movw \reg, #:lower16:(ptr_\var-(1678f+4)) |
30 | movt \reg, #:upper16:(ptr_\var-(1678f+4)) |
31 | 1678: |
32 | ldr \reg, [pc, \reg] |
c67af2ac |
33 | #else |
4ae83961 |
34 | ldr \reg, =ESYM(\var) |
c67af2ac |
35 | #endif |
36 | .endm |
37 | |
38 | #ifdef __ARM_NEON__ |
b17618c0 |
39 | |
5c6457c3 |
40 | FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) |
b17618c0 |
41 | vmov.32 d14[0], r2 |
42 | vmov.32 d14[1], r3 @ multipliers |
43 | mov r12, r0 |
c67af2ac |
44 | load_varadr r0, ChanBuf |
45 | load_varadr r2, SSumLR |
b17618c0 |
46 | add r0, r12, lsl #2 |
47 | add r2, r12, lsl #3 |
48 | 0: |
49 | vldmia r0!, {d0-d1} |
50 | vldmia r2, {d2-d5} |
51 | vmul.s32 d10, d14, d0[0] |
52 | vmul.s32 d11, d14, d0[1] |
53 | vmul.s32 d12, d14, d1[0] |
54 | vmul.s32 d13, d14, d1[1] |
55 | vsra.s32 q1, q5, #14 |
56 | vsra.s32 q2, q6, #14 |
57 | subs r1, #4 |
58 | blt mc_finish |
59 | vstmia r2!, {d2-d5} |
60 | bgt 0b |
61 | nop |
62 | bxeq lr |
63 | |
64 | mc_finish: |
65 | vstmia r2!, {d2} |
587fa7de |
66 | cmp r1, #-2 |
b17618c0 |
67 | vstmiage r2!, {d3} |
587fa7de |
68 | cmp r1, #-1 |
b17618c0 |
69 | vstmiage r2!, {d4} |
70 | bx lr |
71 | |
72 | |
5c6457c3 |
73 | FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv) |
b17618c0 |
74 | vmov.32 d14[0], r2 |
75 | vmov.32 d14[1], r3 @ multipliers |
76 | mov r12, r0 |
c67af2ac |
77 | load_varadr r0, ChanBuf |
78 | load_varadr r3, sRVBStart |
79 | load_varadr r2, SSumLR |
b17618c0 |
80 | ldr r3, [r3] |
81 | add r0, r12, lsl #2 |
82 | add r2, r12, lsl #3 |
83 | add r3, r12, lsl #3 |
84 | 0: |
85 | vldmia r0!, {d0-d1} |
86 | vldmia r2, {d2-d5} |
87 | vldmia r3, {d6-d9} |
88 | vmul.s32 d10, d14, d0[0] |
89 | vmul.s32 d11, d14, d0[1] |
90 | vmul.s32 d12, d14, d1[0] |
91 | vmul.s32 d13, d14, d1[1] |
92 | vsra.s32 q1, q5, #14 |
93 | vsra.s32 q2, q6, #14 |
94 | vsra.s32 q3, q5, #14 |
95 | vsra.s32 q4, q6, #14 |
96 | subs r1, #4 |
97 | blt mcr_finish |
98 | vstmia r2!, {d2-d5} |
99 | vstmia r3!, {d6-d9} |
100 | bgt 0b |
101 | nop |
102 | bxeq lr |
103 | |
104 | mcr_finish: |
105 | vstmia r2!, {d2} |
106 | vstmia r3!, {d6} |
587fa7de |
107 | cmp r1, #-2 |
b17618c0 |
108 | vstmiage r2!, {d3} |
109 | vstmiage r3!, {d7} |
587fa7de |
110 | cmp r1, #-1 |
b17618c0 |
111 | vstmiage r2!, {d4} |
112 | vstmiage r3!, {d8} |
113 | bx lr |
114 | |
665f33e1 |
115 | #elif defined(HAVE_ARMV5) |
3a721c1f |
116 | |
5c6457c3 |
117 | FUNCTION(mix_chan): @ (int start, int count, int lv, int rv) |
3a721c1f |
118 | stmfd sp!, {r4-r8,lr} |
119 | orr r3, r2, r3, lsl #16 |
120 | lsl r3, #1 @ packed multipliers << 1 |
121 | mov r12, r0 |
c67af2ac |
122 | load_varadr r0, ChanBuf |
123 | load_varadr r2, SSumLR |
3a721c1f |
124 | add r0, r12, lsl #2 |
125 | add r2, r12, lsl #3 |
126 | 0: |
127 | ldmia r0!, {r4,r5} |
128 | ldmia r2, {r6-r8,lr} |
129 | lsl r4, #1 @ adjust for mul |
130 | lsl r5, #1 |
131 | smlawb r6, r4, r3, r6 |
132 | smlawt r7, r4, r3, r7 |
133 | smlawb r8, r5, r3, r8 |
134 | smlawt lr, r5, r3, lr |
135 | subs r1, #2 |
136 | blt mc_finish |
137 | stmia r2!, {r6-r8,lr} |
138 | bgt 0b |
139 | ldmeqfd sp!, {r4-r8,pc} |
140 | |
141 | mc_finish: |
142 | stmia r2!, {r6,r7} |
143 | ldmfd sp!, {r4-r8,pc} |
144 | |
145 | |
5c6457c3 |
146 | FUNCTION(mix_chan_rvb): @ (int start, int count, int lv, int rv) |
3a721c1f |
147 | stmfd sp!, {r4-r8,lr} |
148 | orr lr, r2, r3, lsl #16 |
149 | lsl lr, #1 |
c67af2ac |
150 | load_varadr r3, sRVBStart |
151 | load_varadr r2, SSumLR |
152 | load_varadr r4, ChanBuf |
3a721c1f |
153 | ldr r3, [r3] |
154 | add r2, r2, r0, lsl #3 |
155 | add r3, r3, r0, lsl #3 |
156 | add r0, r4, r0, lsl #2 |
157 | 0: |
158 | ldr r4, [r0], #4 |
159 | ldmia r2, {r6,r7} |
160 | ldmia r3, {r8,r12} |
161 | lsl r4, #1 |
162 | smlawb r6, r4, lr, r6 @ supposedly takes single cycle? |
163 | smlawt r7, r4, lr, r7 |
164 | smlawb r8, r4, lr, r8 |
165 | smlawt r12,r4, lr, r12 |
166 | subs r1, #1 |
167 | stmia r2!, {r6,r7} |
168 | stmia r3!, {r8,r12} |
169 | bgt 0b |
170 | ldmfd sp!, {r4-r8,pc} |
171 | |
c67af2ac |
172 | #endif |
3a721c1f |
173 | |
b17618c0 |
174 | @ vim:filetype=armasm |