+++ /dev/null
-/*
- * (C) GraÅžvydas "notaz" Ignotas, 2011
- *
- * This work is licensed under the terms of any of these licenses
- * (at your option):
- * - GNU GPL, version 2 or later.
- * - GNU LGPL, version 2.1 or later.
- * See the COPYING file in the top-level directory.
- */
-
-
-.text
-.align 2
-
-@ XXX: should be HAVE_NEON
-.if HAVE_ARMV7
-
-.global mix_chan @ (int start, int count, int lv, int rv)
-mix_chan:
- vmov.32 d14[0], r2
- vmov.32 d14[1], r3 @ multipliers
- mov r12, r0
- movw r0, #:lower16:ChanBuf
- movw r2, #:lower16:SSumLR
- movt r0, #:upper16:ChanBuf
- movt r2, #:upper16:SSumLR
- add r0, r12, lsl #2
- add r2, r12, lsl #3
-0:
- vldmia r0!, {d0-d1}
- vldmia r2, {d2-d5}
- vmul.s32 d10, d14, d0[0]
- vmul.s32 d11, d14, d0[1]
- vmul.s32 d12, d14, d1[0]
- vmul.s32 d13, d14, d1[1]
- vsra.s32 q1, q5, #14
- vsra.s32 q2, q6, #14
- subs r1, #4
- blt mc_finish
- vstmia r2!, {d2-d5}
- bgt 0b
- nop
- bxeq lr
-
-mc_finish:
- vstmia r2!, {d2}
- cmp r1, #-2
- vstmiage r2!, {d3}
- cmp r1, #-1
- vstmiage r2!, {d4}
- bx lr
-
-
-.global mix_chan_rvb @ (int start, int count, int lv, int rv)
-mix_chan_rvb:
- vmov.32 d14[0], r2
- vmov.32 d14[1], r3 @ multipliers
- mov r12, r0
- movw r0, #:lower16:ChanBuf
- movw r3, #:lower16:sRVBStart
- movw r2, #:lower16:SSumLR
- movt r0, #:upper16:ChanBuf
- movt r3, #:upper16:sRVBStart
- movt r2, #:upper16:SSumLR
- ldr r3, [r3]
- add r0, r12, lsl #2
- add r2, r12, lsl #3
- add r3, r12, lsl #3
-0:
- vldmia r0!, {d0-d1}
- vldmia r2, {d2-d5}
- vldmia r3, {d6-d9}
- vmul.s32 d10, d14, d0[0]
- vmul.s32 d11, d14, d0[1]
- vmul.s32 d12, d14, d1[0]
- vmul.s32 d13, d14, d1[1]
- vsra.s32 q1, q5, #14
- vsra.s32 q2, q6, #14
- vsra.s32 q3, q5, #14
- vsra.s32 q4, q6, #14
- subs r1, #4
- blt mcr_finish
- vstmia r2!, {d2-d5}
- vstmia r3!, {d6-d9}
- bgt 0b
- nop
- bxeq lr
-
-mcr_finish:
- vstmia r2!, {d2}
- vstmia r3!, {d6}
- cmp r1, #-2
- vstmiage r2!, {d3}
- vstmiage r3!, {d7}
- cmp r1, #-1
- vstmiage r2!, {d4}
- vstmiage r3!, {d8}
- bx lr
-
-.else
-
-.global mix_chan @ (int start, int count, int lv, int rv)
-mix_chan:
- stmfd sp!, {r4-r8,lr}
- orr r3, r2, r3, lsl #16
- lsl r3, #1 @ packed multipliers << 1
- mov r12, r0
- ldr r0, =ChanBuf
- ldr r2, =SSumLR
- add r0, r12, lsl #2
- add r2, r12, lsl #3
-0:
- ldmia r0!, {r4,r5}
- ldmia r2, {r6-r8,lr}
- lsl r4, #1 @ adjust for mul
- lsl r5, #1
- smlawb r6, r4, r3, r6
- smlawt r7, r4, r3, r7
- smlawb r8, r5, r3, r8
- smlawt lr, r5, r3, lr
- subs r1, #2
- blt mc_finish
- stmia r2!, {r6-r8,lr}
- bgt 0b
- ldmeqfd sp!, {r4-r8,pc}
-
-mc_finish:
- stmia r2!, {r6,r7}
- ldmfd sp!, {r4-r8,pc}
-
-
-.global mix_chan_rvb @ (int start, int count, int lv, int rv)
-mix_chan_rvb:
- stmfd sp!, {r4-r8,lr}
- orr lr, r2, r3, lsl #16
- lsl lr, #1
- ldr r3, =sRVBStart
- ldr r2, =SSumLR
- ldr r4, =ChanBuf
- ldr r3, [r3]
- add r2, r2, r0, lsl #3
- add r3, r3, r0, lsl #3
- add r0, r4, r0, lsl #2
-0:
- ldr r4, [r0], #4
- ldmia r2, {r6,r7}
- ldmia r3, {r8,r12}
- lsl r4, #1
- smlawb r6, r4, lr, r6 @ supposedly takes single cycle?
- smlawt r7, r4, lr, r7
- smlawb r8, r4, lr, r8
- smlawt r12,r4, lr, r12
- subs r1, #1
- stmia r2!, {r6,r7}
- stmia r3!, {r8,r12}
- bgt 0b
- ldmfd sp!, {r4-r8,pc}
-
-.endif
-
-@ vim:filetype=armasm