X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=plugins%2Fdfsound%2Farm_utils.S;fp=plugins%2Fdfsound%2Farm_utils.S;h=22e584432fbf93b8681cde9b7190d52af0d52eae;hb=c67af2ac1a8305c7377c7dda844257c5bc1545e3;hp=0000000000000000000000000000000000000000;hpb=61bc6d40b4f6f846a0ae1b73ceecdca893c14df4;p=pcsx_rearmed.git diff --git a/plugins/dfsound/arm_utils.S b/plugins/dfsound/arm_utils.S new file mode 100644 index 00000000..22e58443 --- /dev/null +++ b/plugins/dfsound/arm_utils.S @@ -0,0 +1,164 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2011 + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + + +.text +.align 2 + +.macro load_varadr reg var +#if defined(__ARM_ARCH_7A__) && !defined(__PIC__) + movw \reg, #:lower16:\var + movt \reg, #:upper16:\var +#else + ldr \reg, =\var +#endif +.endm + +#ifdef __ARM_NEON__ + +.global mix_chan @ (int start, int count, int lv, int rv) +mix_chan: + vmov.32 d14[0], r2 + vmov.32 d14[1], r3 @ multipliers + mov r12, r0 + load_varadr r0, ChanBuf + load_varadr r2, SSumLR + add r0, r12, lsl #2 + add r2, r12, lsl #3 +0: + vldmia r0!, {d0-d1} + vldmia r2, {d2-d5} + vmul.s32 d10, d14, d0[0] + vmul.s32 d11, d14, d0[1] + vmul.s32 d12, d14, d1[0] + vmul.s32 d13, d14, d1[1] + vsra.s32 q1, q5, #14 + vsra.s32 q2, q6, #14 + subs r1, #4 + blt mc_finish + vstmia r2!, {d2-d5} + bgt 0b + nop + bxeq lr + +mc_finish: + vstmia r2!, {d2} + cmp r1, #-2 + vstmiage r2!, {d3} + cmp r1, #-1 + vstmiage r2!, {d4} + bx lr + + +.global mix_chan_rvb @ (int start, int count, int lv, int rv) +mix_chan_rvb: + vmov.32 d14[0], r2 + vmov.32 d14[1], r3 @ multipliers + mov r12, r0 + load_varadr r0, ChanBuf + load_varadr r3, sRVBStart + load_varadr r2, SSumLR + ldr r3, [r3] + add r0, r12, lsl #2 + add r2, r12, lsl #3 + add r3, r12, lsl #3 +0: + vldmia r0!, {d0-d1} + vldmia r2, {d2-d5} + vldmia r3, {d6-d9} + vmul.s32 d10, d14, d0[0] + vmul.s32 d11, d14, d0[1] + vmul.s32 d12, d14, d1[0] + vmul.s32 d13, d14, d1[1] + vsra.s32 q1, q5, #14 + vsra.s32 q2, q6, #14 + vsra.s32 q3, q5, #14 + vsra.s32 q4, q6, #14 + subs r1, #4 + blt mcr_finish + vstmia r2!, {d2-d5} + vstmia r3!, {d6-d9} + bgt 0b + nop + bxeq lr + +mcr_finish: + vstmia r2!, {d2} + vstmia r3!, {d6} + cmp r1, #-2 + vstmiage r2!, {d3} + vstmiage r3!, {d7} + cmp r1, #-1 + vstmiage r2!, {d4} + vstmiage r3!, {d8} + bx lr + +#else + +.global mix_chan @ (int start, int count, int lv, int rv) +mix_chan: + stmfd sp!, {r4-r8,lr} + orr r3, r2, r3, lsl #16 + lsl r3, #1 @ packed multipliers << 1 + mov r12, r0 + load_varadr r0, ChanBuf + load_varadr r2, SSumLR + add r0, r12, lsl #2 + add r2, r12, lsl #3 +0: + ldmia r0!, {r4,r5} + ldmia r2, {r6-r8,lr} + lsl r4, #1 @ adjust for mul + lsl r5, #1 + smlawb r6, r4, r3, r6 + smlawt r7, r4, r3, r7 + smlawb r8, r5, r3, r8 + smlawt lr, r5, r3, lr + subs r1, #2 + blt mc_finish + stmia r2!, {r6-r8,lr} + bgt 0b + ldmeqfd sp!, {r4-r8,pc} + +mc_finish: + stmia r2!, {r6,r7} + ldmfd sp!, {r4-r8,pc} + + +.global mix_chan_rvb @ (int start, int count, int lv, int rv) +mix_chan_rvb: + stmfd sp!, {r4-r8,lr} + orr lr, r2, r3, lsl #16 + lsl lr, #1 + load_varadr r3, sRVBStart + load_varadr r2, SSumLR + load_varadr r4, ChanBuf + ldr r3, [r3] + add r2, r2, r0, lsl #3 + add r3, r3, r0, lsl #3 + add r0, r4, r0, lsl #2 +0: + ldr r4, [r0], #4 + ldmia r2, {r6,r7} + ldmia r3, {r8,r12} + lsl r4, #1 + smlawb r6, r4, lr, r6 @ supposedly takes single cycle? + smlawt r7, r4, lr, r7 + smlawb r8, r4, lr, r8 + smlawt r12,r4, lr, r12 + subs r1, #1 + stmia r2!, {r6,r7} + stmia r3!, {r8,r12} + bgt 0b + ldmfd sp!, {r4-r8,pc} + +#endif + +@ vim:filetype=armasm