4f265db7 |
1 | @ vim:filetype=armasm |
2 | |
3 | .global memcpy32 @ int *dest, int *src, int count |
4 | |
5 | memcpy32: |
6 | stmfd sp!, {r4,lr} |
7 | |
8 | subs r2, r2, #4 |
9 | bmi mcp32_fin |
10 | |
11 | mcp32_loop: |
12 | ldmia r1!, {r3,r4,r12,lr} |
13 | subs r2, r2, #4 |
14 | stmia r0!, {r3,r4,r12,lr} |
15 | bpl mcp32_loop |
16 | |
17 | mcp32_fin: |
18 | tst r2, #3 |
19 | ldmeqfd sp!, {r4,pc} |
20 | tst r2, #1 |
21 | ldrne r3, [r1], #4 |
22 | strne r3, [r0], #4 |
23 | |
24 | mcp32_no_unal1: |
25 | tst r2, #2 |
26 | ldmneia r1!, {r3,r12} |
27 | ldmfd sp!, {r4,lr} |
28 | stmneia r0!, {r3,r12} |
29 | bx lr |
30 | |
31 | |
32 | |
33 | .global memset32 @ int *dest, int c, int count |
34 | |
35 | memset32: |
36 | stmfd sp!, {lr} |
37 | |
38 | mov r3, r1 |
39 | subs r2, r2, #4 |
40 | bmi mst32_fin |
41 | |
42 | mov r12,r1 |
43 | mov lr, r1 |
44 | |
45 | mst32_loop: |
46 | subs r2, r2, #4 |
47 | stmia r0!, {r1,r3,r12,lr} |
48 | bpl mst32_loop |
49 | |
50 | mst32_fin: |
51 | tst r2, #1 |
52 | strne r1, [r0], #4 |
53 | |
54 | tst r2, #2 |
55 | stmneia r0!, {r1,r3} |
56 | |
57 | ldmfd sp!, {lr} |
58 | bx lr |
59 | |
60 | |
61 | |
62 | @ this assumes src is word aligned |
63 | .global mix_16h_to_32 @ int *dest, short *src, int count |
64 | |
65 | mix_16h_to_32: |
66 | stmfd sp!, {r4-r6,lr} |
67 | /* |
68 | tst r1, #2 |
69 | beq m16_32_mo_unalw |
70 | ldrsh r4, [r1], #2 |
71 | ldr r3, [r0] |
72 | sub r2, r2, #1 |
73 | add r3, r3, r4, asr #1 |
74 | str r3, [r0], #4 |
75 | */ |
76 | m16_32_mo_unalw: |
77 | subs r2, r2, #4 |
78 | bmi m16_32_end |
79 | |
80 | m16_32_loop: |
81 | ldmia r0, {r3-r6} |
82 | ldmia r1!,{r12,lr} |
83 | subs r2, r2, #4 |
84 | add r4, r4, r12,asr #17 @ we use half volume |
85 | mov r12,r12,lsl #16 |
86 | add r3, r3, r12,asr #17 |
87 | add r6, r6, lr, asr #17 |
88 | mov lr, lr, lsl #16 |
89 | add r5, r5, lr, asr #17 |
90 | stmia r0!,{r3-r6} |
91 | bpl m16_32_loop |
92 | |
93 | m16_32_end: |
94 | tst r2, #2 |
95 | beq m16_32_no_unal2 |
96 | ldr r5, [r1], #4 |
97 | ldmia r0, {r3,r4} |
98 | mov r12,r5, lsl #16 |
99 | add r3, r3, r12,asr #17 |
100 | add r4, r4, r5, asr #17 |
101 | stmia r0!,{r3,r4} |
102 | |
103 | m16_32_no_unal2: |
104 | tst r2, #1 |
105 | ldmeqfd sp!, {r4-r6,pc} |
106 | ldrsh r4, [r1], #2 |
107 | ldr r3, [r0] |
108 | add r3, r3, r4, asr #1 |
109 | str r3, [r0], #4 |
110 | |
111 | ldmfd sp!, {r4-r6,lr} |
112 | bx lr |
113 | |
114 | |
115 | |
116 | .global mix_16h_to_32_s1 @ int *dest, short *src, int count |
117 | |
118 | mix_16h_to_32_s1: |
119 | stmfd sp!, {r4-r6,lr} |
120 | |
121 | subs r2, r2, #4 |
122 | bmi m16_32_s1_end |
123 | |
124 | m16_32_s1_loop: |
125 | ldmia r0, {r3-r6} |
126 | ldr r12,[r1], #8 |
127 | ldr lr, [r1], #8 |
128 | subs r2, r2, #4 |
129 | add r4, r4, r12,asr #17 |
130 | mov r12,r12,lsl #16 |
131 | add r3, r3, r12,asr #17 @ we use half volume |
132 | add r6, r6, lr, asr #17 |
133 | mov lr, lr, lsl #16 |
134 | add r5, r5, lr, asr #17 |
135 | stmia r0!,{r3-r6} |
136 | bpl m16_32_s1_loop |
137 | |
138 | m16_32_s1_end: |
139 | tst r2, #2 |
140 | beq m16_32_s1_no_unal2 |
141 | ldr r5, [r1], #8 |
142 | ldmia r0, {r3,r4} |
143 | mov r12,r5, lsl #16 |
144 | add r3, r3, r12,asr #17 |
145 | add r4, r4, r5, asr #17 |
146 | stmia r0!,{r3,r4} |
147 | |
148 | m16_32_s1_no_unal2: |
149 | tst r2, #1 |
150 | ldmeqfd sp!, {r4-r6,pc} |
151 | ldrsh r4, [r1], #2 |
152 | ldr r3, [r0] |
153 | add r3, r3, r4, asr #1 |
154 | str r3, [r0], #4 |
155 | |
156 | ldmfd sp!, {r4-r6,lr} |
157 | bx lr |
158 | |
159 | |
160 | |
161 | .global mix_16h_to_32_s2 @ int *dest, short *src, int count |
162 | |
163 | mix_16h_to_32_s2: |
164 | stmfd sp!, {r4-r6,lr} |
165 | |
166 | subs r2, r2, #4 |
167 | bmi m16_32_s2_end |
168 | |
169 | m16_32_s2_loop: |
170 | ldmia r0, {r3-r6} |
171 | ldr r12,[r1], #16 |
172 | ldr lr, [r1], #16 |
173 | subs r2, r2, #4 |
174 | add r4, r4, r12,asr #17 |
175 | mov r12,r12,lsl #16 |
176 | add r3, r3, r12,asr #17 @ we use half volume |
177 | add r6, r6, lr, asr #17 |
178 | mov lr, lr, lsl #16 |
179 | add r5, r5, lr, asr #17 |
180 | stmia r0!,{r3-r6} |
181 | bpl m16_32_s2_loop |
182 | |
183 | m16_32_s2_end: |
184 | tst r2, #2 |
185 | beq m16_32_s2_no_unal2 |
186 | ldr r5, [r1], #16 |
187 | ldmia r0, {r3,r4} |
188 | mov r12,r5, lsl #16 |
189 | add r3, r3, r12,asr #17 |
190 | add r4, r4, r5, asr #17 |
191 | stmia r0!,{r3,r4} |
192 | |
193 | m16_32_s2_no_unal2: |
194 | tst r2, #1 |
195 | ldmeqfd sp!, {r4-r6,pc} |
196 | ldrsh r4, [r1], #2 |
197 | ldr r3, [r0] |
198 | add r3, r3, r4, asr #1 |
199 | str r3, [r0], #4 |
200 | |
201 | ldmfd sp!, {r4-r6,lr} |
202 | bx lr |
203 | |
204 | |
205 | |
206 | @ limit |
207 | @ reg=int_sample, lr=1, r3=tmp, kills flags |
208 | .macro Limit reg |
7a93adeb |
209 | add r3, lr, \reg, asr #15 |
4f265db7 |
210 | bics r3, r3, #1 @ in non-overflow conditions r3 is 0 or 1 |
211 | movne \reg, #0x8000 |
7a93adeb |
212 | subpl \reg, \reg, #1 |
4f265db7 |
213 | .endm |
214 | |
215 | |
216 | @ limit and shift up by 16 |
217 | @ reg=int_sample, lr=1, r3=tmp, kills flags |
218 | .macro Limitsh reg |
219 | @ movs r4, r3, asr #16 |
220 | @ cmnne r4, #1 |
221 | @ beq c32_16_no_overflow |
222 | @ tst r4, r4 |
223 | @ mov r3, #0x8000 |
224 | @ subpl r3, r3, #1 |
225 | |
7a93adeb |
226 | add r3, lr, \reg, asr #15 |
4f265db7 |
227 | bics r3, r3, #1 @ in non-overflow conditions r3 is 0 or 1 |
228 | moveq \reg, \reg, lsl #16 |
229 | movne \reg, #0x80000000 |
7a93adeb |
230 | subpl \reg, \reg, #0x00010000 |
4f265db7 |
231 | .endm |
232 | |
233 | |
234 | @ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only |
235 | @ warning: this function assumes dest is word aligned |
236 | .global mix_32_to_16l_stereo @ short *dest, int *src, int count |
237 | |
238 | mix_32_to_16l_stereo: |
239 | stmfd sp!, {r4-r8,lr} |
240 | |
241 | mov lr, #1 |
242 | |
243 | mov r2, r2, lsl #1 |
244 | subs r2, r2, #4 |
245 | bmi m32_16l_st_end |
246 | |
247 | m32_16l_st_loop: |
248 | ldmia r0, {r8,r12} |
249 | ldmia r1!, {r4-r7} |
250 | mov r8, r8, lsl #16 |
251 | mov r12,r12,lsl #16 |
252 | add r4, r4, r8, asr #16 |
253 | add r5, r5, r8, asr #16 |
254 | add r6, r6, r12,asr #16 |
255 | add r7, r7, r12,asr #16 |
256 | Limitsh r4 |
257 | Limitsh r5 |
258 | Limitsh r6 |
259 | Limitsh r7 |
260 | subs r2, r2, #4 |
261 | orr r4, r5, r4, lsr #16 |
262 | orr r5, r7, r6, lsr #16 |
263 | stmia r0!, {r4,r5} |
264 | bpl m32_16l_st_loop |
265 | |
266 | m32_16l_st_end: |
267 | @ check for remaining bytes to convert |
268 | tst r2, #2 |
269 | beq m32_16l_st_no_unal2 |
270 | ldrsh r6, [r0] |
271 | ldmia r1!,{r4,r5} |
272 | add r4, r4, r6 |
273 | add r5, r5, r6 |
274 | Limitsh r4 |
275 | Limitsh r5 |
276 | orr r4, r5, r4, lsr #16 |
277 | str r4, [r0], #4 |
278 | |
279 | m32_16l_st_no_unal2: |
280 | ldmfd sp!, {r4-r8,lr} |
281 | bx lr |
282 | |
283 | |
284 | @ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio (for mono sound) |
285 | .global mix_32_to_16_mono @ short *dest, int *src, int count |
286 | |
287 | mix_32_to_16_mono: |
288 | stmfd sp!, {r4-r8,lr} |
289 | |
290 | mov lr, #1 |
291 | |
292 | @ check if dest is word aligned |
293 | tst r0, #2 |
294 | beq m32_16_mo_no_unalw |
7a93adeb |
295 | ldrsh r5, [r0] |
4f265db7 |
296 | ldr r4, [r1], #4 |
297 | sub r2, r2, #1 |
298 | add r4, r4, r5 |
299 | Limit r4 |
300 | strh r4, [r0], #2 |
301 | |
302 | m32_16_mo_no_unalw: |
303 | subs r2, r2, #4 |
304 | bmi m32_16_mo_end |
305 | |
306 | m32_16_mo_loop: |
307 | ldmia r0, {r8,r12} |
308 | ldmia r1!, {r4-r7} |
309 | add r5, r5, r8, asr #16 |
310 | mov r8, r8, lsl #16 |
311 | add r4, r4, r8, asr #16 |
312 | add r7, r7, r12,asr #16 |
313 | mov r12,r12,lsl #16 |
314 | add r6, r6, r12,asr #16 |
315 | Limitsh r4 |
316 | Limitsh r5 |
317 | Limitsh r6 |
318 | Limitsh r7 |
319 | subs r2, r2, #4 |
320 | orr r4, r5, r4, lsr #16 |
321 | orr r5, r7, r6, lsr #16 |
322 | stmia r0!, {r4,r5} |
323 | bpl m32_16_mo_loop |
324 | |
325 | m32_16_mo_end: |
326 | @ check for remaining bytes to convert |
327 | tst r2, #2 |
328 | beq m32_16_mo_no_unal2 |
329 | ldr r6, [r0] |
330 | ldmia r1!,{r4,r5} |
331 | add r5, r5, r6, asr #16 |
332 | mov r6, r6, lsl #16 |
333 | add r4, r4, r6, asr #16 |
334 | Limitsh r4 |
335 | Limitsh r5 |
336 | orr r4, r5, r4, lsr #16 |
337 | str r4, [r0], #4 |
338 | |
339 | m32_16_mo_no_unal2: |
340 | tst r2, #1 |
341 | ldmeqfd sp!, {r4-r8,pc} |
7a93adeb |
342 | ldrsh r5, [r0] |
4f265db7 |
343 | ldr r4, [r1], #4 |
344 | add r4, r4, r5 |
345 | Limit r4 |
346 | strh r4, [r0], #2 |
347 | |
348 | ldmfd sp!, {r4-r8,lr} |
349 | bx lr |
350 | |