From b17618c006c1a79ea0aa38c5e452a384f7f2bc95 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 1 Aug 2011 14:07:33 +0300 Subject: [PATCH] spu: split out mixing, use NEON for that on ARM --- Makefile | 3 ++ plugins/dfsound/arm_utils.s | 98 +++++++++++++++++++++++++++++++++++++ plugins/dfsound/spu.c | 73 +++++++++++++++++---------- 3 files changed, 149 insertions(+), 25 deletions(-) create mode 100644 plugins/dfsound/arm_utils.s diff --git a/Makefile b/Makefile index 335a03ac..11b73877 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,9 @@ OBJS += plugins/dfsound/dma.o plugins/dfsound/freeze.o \ plugins/dfsound/registers.o plugins/dfsound/spu.o plugins/dfsound/spu.o: plugins/dfsound/adsr.c plugins/dfsound/reverb.c \ plugins/dfsound/xa.c +ifeq "$(ARCH)" "arm" +OBJS += plugins/dfsound/arm_utils.o +endif ifeq "$(USE_OSS)" "1" plugins/dfsound/%.o: CFLAGS += -DUSEOSS OBJS += plugins/dfsound/oss.o diff --git a/plugins/dfsound/arm_utils.s b/plugins/dfsound/arm_utils.s new file mode 100644 index 00000000..a278174d --- /dev/null +++ b/plugins/dfsound/arm_utils.s @@ -0,0 +1,98 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2011 + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + + +.text +.align 2 + + +.global mix_chan @ (int start, int count, int lv, int rv) +mix_chan: + vmov.32 d14[0], r2 + vmov.32 d14[1], r3 @ multipliers + mov r12, r0 + movw r0, #:lower16:ChanBuf + movw r2, #:lower16:SSumLR + movt r0, #:upper16:ChanBuf + movt r2, #:upper16:SSumLR + add r0, r12, lsl #2 + add r2, r12, lsl #3 +0: + vldmia r0!, {d0-d1} + vldmia r2, {d2-d5} + vmul.s32 d10, d14, d0[0] + vmul.s32 d11, d14, d0[1] + vmul.s32 d12, d14, d1[0] + vmul.s32 d13, d14, d1[1] + vsra.s32 q1, q5, #14 + vsra.s32 q2, q6, #14 + subs r1, #4 + blt mc_finish + vstmia r2!, {d2-d5} + bgt 0b + nop + bxeq lr + +mc_finish: + vstmia r2!, {d2} + cmp r1, #2 + vstmiage r2!, {d3} + cmp r1, #3 + vstmiage r2!, {d4} + bx lr + + +.global mix_chan_rvb @ (int start, int count, int lv, int rv) +mix_chan_rvb: + vmov.32 d14[0], r2 + vmov.32 d14[1], r3 @ multipliers + mov r12, r0 + movw r0, #:lower16:ChanBuf + movw r3, #:lower16:sRVBStart + movw r2, #:lower16:SSumLR + movt r0, #:upper16:ChanBuf + movt r3, #:upper16:sRVBStart + movt r2, #:upper16:SSumLR + ldr r3, [r3] + add r0, r12, lsl #2 + add r2, r12, lsl #3 + add r3, r12, lsl #3 +0: + vldmia r0!, {d0-d1} + vldmia r2, {d2-d5} + vldmia r3, {d6-d9} + vmul.s32 d10, d14, d0[0] + vmul.s32 d11, d14, d0[1] + vmul.s32 d12, d14, d1[0] + vmul.s32 d13, d14, d1[1] + vsra.s32 q1, q5, #14 + vsra.s32 q2, q6, #14 + vsra.s32 q3, q5, #14 + vsra.s32 q4, q6, #14 + subs r1, #4 + blt mcr_finish + vstmia r2!, {d2-d5} + vstmia r3!, {d6-d9} + bgt 0b + nop + bxeq lr + +mcr_finish: + vstmia r2!, {d2} + vstmia r3!, {d6} + cmp r1, #2 + vstmiage r2!, {d3} + vstmiage r3!, {d7} + cmp r1, #3 + vstmiage r2!, {d4} + vstmiage r3!, {d8} + bx lr + +@ vim:filetype=armasm diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 791b27eb..36ac018b 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -122,8 +122,8 @@ static const int f[8][2] = { { 0, 0 }, { 115, -52 }, { 98, -55 }, { 122, -60 } }; -int ChanBuf[NSSIZE]; -int SSumLR[NSSIZE*2]; +int ChanBuf[NSSIZE+3]; +int SSumLR[(NSSIZE+3)*2]; int iFMod[NSSIZE]; int iCycle = 0; short * pS; @@ -635,6 +635,49 @@ static int do_samples_noise(int ch, int ns, int ns_to) return -1; } +#ifdef __arm__ +// asm code +extern void mix_chan(int start, int count, int lv, int rv); +extern void mix_chan_rvb(int start, int count, int lv, int rv); +#else +static void mix_chan(int start, int count, int lv, int rv) +{ + int *dst = SSumLR + start * 2; + const int *src = ChanBuf + start; + int l, r; + + while (count--) + { + int sval = *src++; + + l = (sval * lv) >> 14; + r = (sval * rv) >> 14; + *dst++ += l; + *dst++ += r; + } +} + +static void mix_chan_rvb(int start, int count, int lv, int rv) +{ + int *dst = SSumLR + start * 2; + int *drvb = sRVBStart + start * 2; + const int *src = ChanBuf + start; + int l, r; + + while (count--) + { + int sval = *src++; + + l = (sval * lv) >> 14; + r = (sval * rv) >> 14; + *dst++ += l; + *dst++ += r; + *drvb++ += l; + *drvb++ += r; + } +} +#endif + //////////////////////////////////////////////////////////////////////// // MAIN SPU FUNCTION // here is the main job handler... thread, timer or direct func call @@ -725,30 +768,10 @@ static void *MAINThread(void *arg) if(s_chan[ch].bFMod==2) // fmod freq channel memcpy(iFMod, ChanBuf, sizeof(iFMod)); + else if(s_chan[ch].bRVBActive) + mix_chan_rvb(ns_from,ns_to-ns_from,s_chan[ch].iLeftVolume,s_chan[ch].iRightVolume); else - { - int lv=s_chan[ch].iLeftVolume; - int rv=s_chan[ch].iRightVolume; - - for(ns=ns_from;ns>14; - r=(sval*rv)>>14; - SSumLR[ns*2] +=l; - SSumLR[ns*2+1]+=r; - - ////////////////////////////////////////////// - // now let us store sound data for reverb - - if(s_chan[ch].bRVBActive) StoreREVERB(ch,ns,l,r); - } - } + mix_chan(ns_from,ns_to-ns_from,s_chan[ch].iLeftVolume,s_chan[ch].iRightVolume); } } -- 2.39.2