platform ps2, handle audio similar to psp
[picodrive.git] / pico / sound / mix_arm.S
CommitLineData
cff531af 1/*
2 * Generic routines for mixing audio samples
3 * (C) notaz, 2007,2008
7bf552b5 4 * (C) irixxxx, 2019,2020 added filtering
cff531af 5 *
6 * This work is licensed under the terms of MAME license.
7 * See COPYING file in the top-level directory.
8 */
4f265db7 9
4a32f01f 10.text
11.align 4
12
4f265db7 13@ this assumes src is word aligned
14.global mix_16h_to_32 @ int *dest, short *src, int count
15
16mix_16h_to_32:
17 stmfd sp!, {r4-r6,lr}
18/*
19 tst r1, #2
20 beq m16_32_mo_unalw
21 ldrsh r4, [r1], #2
22 ldr r3, [r0]
23 sub r2, r2, #1
24 add r3, r3, r4, asr #1
25 str r3, [r0], #4
26*/
27m16_32_mo_unalw:
28 subs r2, r2, #4
29 bmi m16_32_end
30
31m16_32_loop:
32 ldmia r0, {r3-r6}
33 ldmia r1!,{r12,lr}
34 subs r2, r2, #4
2eeee072 35 add r4, r4, r12,asr #17 @ we use 5/8 volume
36 add r4, r4, r12,asr #19
4f265db7 37 mov r12,r12,lsl #16
38 add r3, r3, r12,asr #17
2eeee072 39 add r3, r3, r12,asr #19
4f265db7 40 add r6, r6, lr, asr #17
2eeee072 41 add r6, r6, lr, asr #19
4f265db7 42 mov lr, lr, lsl #16
43 add r5, r5, lr, asr #17
2eeee072 44 add r5, r5, lr, asr #19
4f265db7 45 stmia r0!,{r3-r6}
46 bpl m16_32_loop
47
48m16_32_end:
49 tst r2, #2
50 beq m16_32_no_unal2
51 ldr r5, [r1], #4
52 ldmia r0, {r3,r4}
53 mov r12,r5, lsl #16
54 add r3, r3, r12,asr #17
2eeee072 55 add r3, r3, r12,asr #19
4f265db7 56 add r4, r4, r5, asr #17
2eeee072 57 add r4, r4, r5, asr #19
4f265db7 58 stmia r0!,{r3,r4}
59
60m16_32_no_unal2:
61 tst r2, #1
62 ldmeqfd sp!, {r4-r6,pc}
63 ldrsh r4, [r1], #2
64 ldr r3, [r0]
65 add r3, r3, r4, asr #1
2eeee072 66 add r3, r3, r4, asr #3
4f265db7 67 str r3, [r0], #4
68
69 ldmfd sp!, {r4-r6,lr}
70 bx lr
71
72
73
74.global mix_16h_to_32_s1 @ int *dest, short *src, int count
75
76mix_16h_to_32_s1:
77 stmfd sp!, {r4-r6,lr}
78
79 subs r2, r2, #4
80 bmi m16_32_s1_end
81
82m16_32_s1_loop:
83 ldmia r0, {r3-r6}
84 ldr r12,[r1], #8
85 ldr lr, [r1], #8
86 subs r2, r2, #4
2eeee072 87 add r4, r4, r12,asr #17 @ we use 5/8 volume
88 add r4, r4, r12,asr #19
4f265db7 89 mov r12,r12,lsl #16
2eeee072 90 add r3, r3, r12,asr #17
91 add r3, r3, r12,asr #19
4f265db7 92 add r6, r6, lr, asr #17
2eeee072 93 add r6, r6, lr, asr #19
4f265db7 94 mov lr, lr, lsl #16
95 add r5, r5, lr, asr #17
2eeee072 96 add r5, r5, lr, asr #19
4f265db7 97 stmia r0!,{r3-r6}
98 bpl m16_32_s1_loop
99
100m16_32_s1_end:
101 tst r2, #2
102 beq m16_32_s1_no_unal2
103 ldr r5, [r1], #8
104 ldmia r0, {r3,r4}
105 mov r12,r5, lsl #16
106 add r3, r3, r12,asr #17
2eeee072 107 add r3, r3, r12,asr #19
4f265db7 108 add r4, r4, r5, asr #17
2eeee072 109 add r4, r4, r5, asr #19
4f265db7 110 stmia r0!,{r3,r4}
111
112m16_32_s1_no_unal2:
113 tst r2, #1
114 ldmeqfd sp!, {r4-r6,pc}
115 ldrsh r4, [r1], #2
116 ldr r3, [r0]
117 add r3, r3, r4, asr #1
2eeee072 118 add r3, r3, r4, asr #3
4f265db7 119 str r3, [r0], #4
120
121 ldmfd sp!, {r4-r6,lr}
122 bx lr
123
124
125
126.global mix_16h_to_32_s2 @ int *dest, short *src, int count
127
128mix_16h_to_32_s2:
129 stmfd sp!, {r4-r6,lr}
130
131 subs r2, r2, #4
132 bmi m16_32_s2_end
133
134m16_32_s2_loop:
135 ldmia r0, {r3-r6}
136 ldr r12,[r1], #16
137 ldr lr, [r1], #16
138 subs r2, r2, #4
2eeee072 139 add r4, r4, r12,asr #17 @ we use 5/8 volume
140 add r4, r4, r12,asr #19
4f265db7 141 mov r12,r12,lsl #16
2eeee072 142 add r3, r3, r12,asr #17
143 add r3, r3, r12,asr #19
4f265db7 144 add r6, r6, lr, asr #17
2eeee072 145 add r6, r6, lr, asr #19
4f265db7 146 mov lr, lr, lsl #16
147 add r5, r5, lr, asr #17
2eeee072 148 add r5, r5, lr, asr #19
4f265db7 149 stmia r0!,{r3-r6}
150 bpl m16_32_s2_loop
151
152m16_32_s2_end:
153 tst r2, #2
154 beq m16_32_s2_no_unal2
155 ldr r5, [r1], #16
156 ldmia r0, {r3,r4}
157 mov r12,r5, lsl #16
158 add r3, r3, r12,asr #17
2eeee072 159 add r3, r3, r12,asr #19
4f265db7 160 add r4, r4, r5, asr #17
2eeee072 161 add r4, r4, r5, asr #19
4f265db7 162 stmia r0!,{r3,r4}
163
164m16_32_s2_no_unal2:
165 tst r2, #1
166 ldmeqfd sp!, {r4-r6,pc}
167 ldrsh r4, [r1], #2
168 ldr r3, [r0]
169 add r3, r3, r4, asr #1
2eeee072 170 add r3, r3, r4, asr #3
4f265db7 171 str r3, [r0], #4
172
173 ldmfd sp!, {r4-r6,lr}
174 bx lr
175
176
177
f7741cac 178.global mix_16h_to_32_resample_stereo @ int *dest, short *src, int count, int fac16
179
180mix_16h_to_32_resample_stereo:
181 stmfd sp!, {r4-r9,lr}
182
183 subs r2, r2, #2
184 mov r4, #0
185 bmi m16_32_rss_end
186
187m16_32_rss_loop:
188 ldmia r0, {r5-r8}
110a49ed 189 mov r9, r4, lsr #16
f7741cac 190 ldr r12,[r1, r9, lsl #2]
191 add r4, r4, r3
110a49ed 192 mov r9, r4, lsr #16
f7741cac 193 ldr lr ,[r1, r9, lsl #2]
194 add r4, r4, r3
195 subs r2, r2, #2
2eeee072 196 add r6, r6, r12,asr #17 @ we use 5/8 volume
197 add r6, r6, r12,asr #19
f7741cac 198 mov r12,r12,lsl #16
2eeee072 199 add r5, r5, r12,asr #17
200 add r5, r5, r12,asr #19
f7741cac 201 add r8, r8, lr, asr #17
2eeee072 202 add r8, r8, lr, asr #19
f7741cac 203 mov lr, lr, lsl #16
204 add r7, r7, lr, asr #17
2eeee072 205 add r7, r7, lr, asr #19
f7741cac 206 stmia r0!,{r5-r8}
207 bpl m16_32_rss_loop
208
209m16_32_rss_end:
210 tst r2, #1
211 ldmeqfd sp!, {r4-r9,pc}
110a49ed 212 mov r9, r4, lsr #16
f7741cac 213 ldr lr ,[r1, r9, lsl #2]
214 ldmia r0, {r5,r6}
215 mov r12,lr, lsl #16
216 add r5, r5, r12,asr #17
2eeee072 217 add r5, r5, r12,asr #19
f7741cac 218 add r6, r6, lr, asr #17
2eeee072 219 add r6, r6, lr, asr #19
f7741cac 220 stmia r0!,{r5,r6}
221
222 ldmfd sp!, {r4-r9,lr}
223 bx lr
224
225
226
227.global mix_16h_to_32_resample_mono @ int *dest, short *src, int count, int fac16
228
229mix_16h_to_32_resample_mono:
230 stmfd sp!, {r4-r6,r9,lr}
231
232 subs r2, r2, #2
233 mov r4, #0
234 bmi m16_32_rsm_end
235
236m16_32_rsm_loop:
237 ldmia r0, {r5-r6}
110a49ed 238 mov r9, r4, lsr #16
f7741cac 239 ldr r12,[r1, r9, lsl #2]
240 add r4, r4, r3
110a49ed 241 mov r9, r4, lsr #16
f7741cac 242 ldr lr ,[r1, r9, lsl #2]
243 add r4, r4, r3
244 subs r2, r2, #2
2eeee072 245 add r5, r5, r12,asr #18 @ we use 5/8 volume (= 5/16 vol per channel)
246 add r5, r5, r12,asr #20
f7741cac 247 mov r12,r12,lsl #16
2eeee072 248 add r5, r5, r12,asr #18
249 add r5, r5, r12,asr #20
f7741cac 250 add r6, r6, lr, asr #18
2eeee072 251 add r6, r6, lr, asr #20
f7741cac 252 mov lr, lr, lsl #16
253 add r6, r6, lr, asr #18
2eeee072 254 add r6, r6, lr, asr #20
f7741cac 255 stmia r0!,{r5-r6}
256 bpl m16_32_rsm_loop
257
258m16_32_rsm_end:
259 tst r2, #1
260 ldmeqfd sp!, {r4-r6,r9,pc}
110a49ed 261 mov r9, r4, lsr #16
f7741cac 262 ldr lr ,[r1, r9, lsl #2]
263 ldr r5, [r0]
264 mov r12,lr, lsl #16
265 add r5, r5, r12,asr #18
2eeee072 266 add r5, r5, r12,asr #20
f7741cac 267 add r5, r5, lr, asr #18
2eeee072 268 add r5, r5, lr, asr #20
f7741cac 269 str r5, [r0]
270
271 ldmfd sp!, {r4-r6,r9,lr}
272 bx lr
273
274
275
4f265db7 276@ limit
30969671 277@ reg=int_sample, r12=1, r8=tmp, kills flags
4f265db7 278.macro Limit reg
a5a230e0 279 sub \reg, \reg, \reg, asr #3 @ reduce audio lvl some to avoid clipping
30969671 280 add r8, r12, \reg, asr #15
281 bics r8, r8, #1 @ in non-overflow conditions r8 is 0 or 1
4f265db7 282 movne \reg, #0x8000
7a93adeb 283 subpl \reg, \reg, #1
4f265db7 284.endm
285
4f265db7 286@ limit and shift up by 16
30969671 287@ reg=int_sample, r12=1, r8=tmp, kills flags
4f265db7 288.macro Limitsh reg
a5a230e0 289 sub \reg, \reg, \reg, asr #3 @ reduce audio lvl some to avoid clipping
30969671 290 add r8, r12,\reg, asr #15
291 bics r8, r8, #1 @ in non-overflow conditions r8 is 0 or 1
4f265db7 292 moveq \reg, \reg, lsl #16
293 movne \reg, #0x80000000
7a93adeb 294 subpl \reg, \reg, #0x00010000
4f265db7 295.endm
296
30969671 297
2a942f0d 298@ filter out DC offset
30969671 299@ in=int_sample (max 20 bit), y=filter memory, r8=tmp
2a942f0d 300.macro DCfilt in y
30969671 301 rsb r8, \y, \in, lsl #12 @ fixpoint 20.12
302 add \y, \y, r8, asr #12 @ alpha = 1-1/4094
303 sub \in, \in, \y, asr #12
304.endm
305
306@ lowpass filter
307@ in=int_sample (max 20 bit), y=filter memory, r12=alpha(Q8), r8=tmp
308.macro LPfilt in y
110a49ed 309@ mov r8, \y, asr #8
30969671 310@ rsb r8, r8, \in, lsl #4 @ fixpoint 20.12
311 sub r8, \in, \y, asr #12 @ fixpoint 20.12
312 mla \y, r8, r12, \y
110a49ed 313 mov \in, \y, asr #12
2a942f0d 314.endm
4f265db7 315
30969671 316
4f265db7 317@ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only
318@ warning: this function assumes dest is word aligned
70efc52d 319.global mix_32_to_16_stereo @ short *dest, int *src, int count
4f265db7 320
70efc52d 321mix_32_to_16_stereo:
2a942f0d 322 stmfd sp!, {r4-r8,r10-r11,lr}
4f265db7 323
324 mov r2, r2, lsl #1
325 subs r2, r2, #4
70efc52d 326 bmi m32_16_st_end
4f265db7 327
2a942f0d 328 ldr r12, =filter
30969671 329 ldr r8, [r12], #4
330 ldmia r12, {r3,r10-r11,lr}
331 str r8, [sp, #-4]!
2a942f0d 332
70efc52d 333m32_16_st_loop:
4f265db7 334 ldmia r0, {r8,r12}
335 ldmia r1!, {r4-r7}
70efc52d 336 add r5, r5, r8, asr #16
337 add r7, r7, r12,asr #16
4f265db7 338 mov r8, r8, lsl #16
339 mov r12,r12,lsl #16
340 add r4, r4, r8, asr #16
4f265db7 341 add r6, r6, r12,asr #16
30969671 342 ldr r12,[sp]
343 LPfilt r4, r3
344 LPfilt r5, lr
345 LPfilt r6, r3
346 LPfilt r7, lr
2a942f0d 347 DCfilt r4, r10
348 DCfilt r5, r11
349 DCfilt r6, r10
350 DCfilt r7, r11
30969671 351 mov r12,#1
4f265db7 352 Limitsh r4
353 Limitsh r5
354 Limitsh r6
355 Limitsh r7
356 subs r2, r2, #4
357 orr r4, r5, r4, lsr #16
358 orr r5, r7, r6, lsr #16
359 stmia r0!, {r4,r5}
70efc52d 360 bpl m32_16_st_loop
4f265db7 361
70efc52d 362m32_16_st_end:
4f265db7 363 @ check for remaining bytes to convert
364 tst r2, #2
70efc52d 365 beq m32_16_st_no_unal2
366 ldr r6, [r0]
4f265db7 367 ldmia r1!,{r4,r5}
70efc52d 368 add r5, r5, r6, asr #16
369 mov r6, r6, lsl #16
370 add r4, r4, r6, asr #16
30969671 371 ldr r12,[sp]
372 LPfilt r4, r3
373 LPfilt r5, lr
2a942f0d 374 DCfilt r4, r10
375 DCfilt r5, r11
30969671 376 mov r12,#1
4f265db7 377 Limitsh r4
378 Limitsh r5
379 orr r4, r5, r4, lsr #16
380 str r4, [r0], #4
381
70efc52d 382m32_16_st_no_unal2:
2a942f0d 383 ldr r12, =filter
30969671 384 add r12,r12, #4
385 stmia r12, {r3,r10-r11,lr}
386 add sp, sp, #4
2a942f0d 387 ldmfd sp!, {r4-r8,r10-r11,lr}
4f265db7 388 bx lr
389
390
391@ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio (for mono sound)
392.global mix_32_to_16_mono @ short *dest, int *src, int count
393
394mix_32_to_16_mono:
2a942f0d 395 stmfd sp!, {r4-r8,r10-r11,lr}
4f265db7 396
2a942f0d 397 ldr r12, =filter
30969671 398 ldr r8, [r12], #4
399 ldmia r12, {r10-r11}
400 str r8, [sp, #-4]!
4f265db7 401
402 @ check if dest is word aligned
403 tst r0, #2
404 beq m32_16_mo_no_unalw
7a93adeb 405 ldrsh r5, [r0]
4f265db7 406 ldr r4, [r1], #4
407 sub r2, r2, #1
408 add r4, r4, r5
30969671 409 ldr r12,[sp]
410 LPfilt r4, r11
411 DCfilt r4, r10
412 mov r12,#1
4f265db7 413 Limit r4
414 strh r4, [r0], #2
415
416m32_16_mo_no_unalw:
417 subs r2, r2, #4
418 bmi m32_16_mo_end
419
420m32_16_mo_loop:
421 ldmia r0, {r8,r12}
422 ldmia r1!, {r4-r7}
423 add r5, r5, r8, asr #16
4f265db7 424 add r7, r7, r12,asr #16
70efc52d 425 mov r8, r8, lsl #16
4f265db7 426 mov r12,r12,lsl #16
70efc52d 427 add r4, r4, r8, asr #16
4f265db7 428 add r6, r6, r12,asr #16
30969671 429 ldr r12,[sp]
430 LPfilt r4, r11
431 LPfilt r5, r11
432 LPfilt r6, r11
433 LPfilt r7, r11
2a942f0d 434 DCfilt r4, r10
435 DCfilt r5, r10
436 DCfilt r6, r10
437 DCfilt r7, r10
30969671 438 mov r12,#1
4f265db7 439 Limitsh r4
440 Limitsh r5
441 Limitsh r6
442 Limitsh r7
443 subs r2, r2, #4
444 orr r4, r5, r4, lsr #16
445 orr r5, r7, r6, lsr #16
446 stmia r0!, {r4,r5}
447 bpl m32_16_mo_loop
448
449m32_16_mo_end:
450 @ check for remaining bytes to convert
451 tst r2, #2
452 beq m32_16_mo_no_unal2
453 ldr r6, [r0]
454 ldmia r1!,{r4,r5}
455 add r5, r5, r6, asr #16
456 mov r6, r6, lsl #16
457 add r4, r4, r6, asr #16
30969671 458 ldr r12,[sp]
459 LPfilt r4, r11
460 LPfilt r5, r11
2a942f0d 461 DCfilt r4, r10
462 DCfilt r5, r10
30969671 463 mov r12,#1
4f265db7 464 Limitsh r4
465 Limitsh r5
466 orr r4, r5, r4, lsr #16
467 str r4, [r0], #4
468
469m32_16_mo_no_unal2:
470 tst r2, #1
2a942f0d 471 beq m32_16_mo_no_unal
7a93adeb 472 ldrsh r5, [r0]
4f265db7 473 ldr r4, [r1], #4
474 add r4, r4, r5
30969671 475 ldr r12,[sp]
476 LPfilt r4, r11
2a942f0d 477 DCfilt r4, r10
30969671 478 mov r12,#1
4f265db7 479 Limit r4
480 strh r4, [r0], #2
481
2a942f0d 482m32_16_mo_no_unal:
483 ldr r12, =filter
30969671 484 add r12,r12, #4
485 stmia r12, {r10-r11}
486 add sp, sp, #4
2a942f0d 487 ldmfd sp!, {r4-r8,r10-r11,lr}
4f265db7 488 bx lr
489
4a32f01f 490
ae7830aa 491#ifdef __GP2X__
4a32f01f 492
493.data
494.align 4
495
70efc52d 496.global mix_32_to_16_level
497mix_32_to_16_level:
4a32f01f 498 .word 0
499
500.text
501.align 4
502
70efc52d 503@ same as mix_32_to_16_stereo, but with additional shift
504.global mix_32_to_16_stereo_lvl @ short *dest, int *src, int count
4a32f01f 505
70efc52d 506mix_32_to_16_stereo_lvl:
2a942f0d 507 stmfd sp!, {r4-r11,lr}
4a32f01f 508
70efc52d 509 ldr r9, =mix_32_to_16_level
4a32f01f 510 mov lr, #1
511 ldr r9, [r9]
2a942f0d 512 ldr r12, =filter
30969671 513 ldr r8, [r12], #4
514 ldmia r12, {r3,r10-r11,lr}
515 str r8, [sp, #-4]!
4a32f01f 516
517 mov r2, r2, lsl #1
518 subs r2, r2, #4
70efc52d 519 bmi m32_16_st_l_end
4a32f01f 520
70efc52d 521m32_16_st_l_loop:
4a32f01f 522 ldmia r0, {r8,r12}
523 ldmia r1!, {r4-r7}
70efc52d 524 add r5, r5, r8, asr #16
525 add r7, r7, r12,asr #16
4a32f01f 526 mov r8, r8, lsl #16
527 mov r12,r12,lsl #16
528 add r4, r4, r8, asr #16
4a32f01f 529 add r6, r6, r12,asr #16
4a32f01f 530 mov r4, r4, asr r9
531 mov r5, r5, asr r9
532 mov r6, r6, asr r9
533 mov r7, r7, asr r9
30969671 534 ldr r12,[sp]
535 LPfilt r4, r3
536 LPfilt r5, lr
537 LPfilt r6, r3
538 LPfilt r7, lr
2a942f0d 539 DCfilt r4, r10
540 DCfilt r5, r11
541 DCfilt r6, r10
542 DCfilt r7, r11
30969671 543 mov r12,#1
4a32f01f 544 Limitsh r4
545 Limitsh r5
546 Limitsh r6
547 Limitsh r7
548 subs r2, r2, #4
549 orr r4, r5, r4, lsr #16
550 orr r5, r7, r6, lsr #16
551 stmia r0!, {r4,r5}
70efc52d 552 bpl m32_16_st_l_loop
4a32f01f 553
70efc52d 554m32_16_st_l_end:
4a32f01f 555 @ check for remaining bytes to convert
556 tst r2, #2
70efc52d 557 beq m32_16_st_l_no_unal2
558 ldr r6, [r0]
4a32f01f 559 ldmia r1!,{r4,r5}
70efc52d 560 add r5, r5, r6, asr #16
561 mov r6, r6, lsl #16
562 add r4, r4, r6, asr #16
4a32f01f 563 mov r4, r4, asr r9
564 mov r5, r5, asr r9
30969671 565 ldr r12,[sp]
566 LPfilt r4, r3
567 LPfilt r5, lr
2a942f0d 568 DCfilt r4, r10
569 DCfilt r5, r11
30969671 570 mov r12,#1
4a32f01f 571 Limitsh r4
572 Limitsh r5
573 orr r4, r5, r4, lsr #16
574 str r4, [r0], #4
575
70efc52d 576m32_16_st_l_no_unal2:
2a942f0d 577 ldr r12, =filter
30969671 578 add r12,r12, #4
579 stmia r12, {r3,r10-r11,lr}
580 add sp, sp, #4
2a942f0d 581 ldmfd sp!, {r4-r11,lr}
582 bx lr
583
8ac9ab7f 584#endif /* __GP2X__ */
585
30969671 586.global mix_reset @ int alpha_q16
2a942f0d 587mix_reset:
30969671 588 ldr r2, =filter
589 rsb r0, r0, #0x10000
110a49ed 590@ mov r0, r0, asr #8
591 mov r0, r0, asr #4
30969671 592 str r0, [r2], #4
2a942f0d 593 mov r1, #0
30969671 594 str r1, [r2], #4
595 str r1, [r2], #4
596 str r1, [r2], #4
597 str r1, [r2], #4
4a32f01f 598 bx lr
599
2a942f0d 600.data
2a942f0d 601filter:
30969671 602 .ds 4 @ alpha_q8
603 .ds 8 @ filter history for left channel
604 .ds 8 @ filter history for right channel
2a942f0d 605
cff531af 606@ vim:filetype=armasm