spu: split out mixing, use NEON for that on ARM
authornotaz <notasas@gmail.com>
Mon, 1 Aug 2011 11:07:33 +0000 (14:07 +0300)
committernotaz <notasas@gmail.com>
Mon, 1 Aug 2011 20:32:15 +0000 (23:32 +0300)
Makefile
plugins/dfsound/arm_utils.s [new file with mode: 0644]
plugins/dfsound/spu.c

index 335a03a..11b7387 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -58,6 +58,9 @@ OBJS += plugins/dfsound/dma.o plugins/dfsound/freeze.o \
        plugins/dfsound/registers.o plugins/dfsound/spu.o
 plugins/dfsound/spu.o: plugins/dfsound/adsr.c plugins/dfsound/reverb.c \
        plugins/dfsound/xa.c
+ifeq "$(ARCH)" "arm"
+OBJS += plugins/dfsound/arm_utils.o
+endif
 ifeq "$(USE_OSS)" "1"
 plugins/dfsound/%.o: CFLAGS += -DUSEOSS
 OBJS += plugins/dfsound/oss.o
diff --git a/plugins/dfsound/arm_utils.s b/plugins/dfsound/arm_utils.s
new file mode 100644 (file)
index 0000000..a278174
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * (C) GraÅžvydas "notaz" Ignotas, 2011
+ *
+ * This work is licensed under the terms of any of these licenses
+ * (at your option):
+ *  - GNU GPL, version 2 or later.
+ *  - GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+
+.text
+.align 2
+
+
+.global mix_chan @ (int start, int count, int lv, int rv)
+mix_chan:
+    vmov.32     d14[0], r2
+    vmov.32     d14[1], r3             @ multipliers
+    mov         r12, r0
+    movw        r0, #:lower16:ChanBuf
+    movw        r2, #:lower16:SSumLR
+    movt        r0, #:upper16:ChanBuf
+    movt        r2, #:upper16:SSumLR
+    add         r0, r12, lsl #2
+    add         r2, r12, lsl #3
+0:
+    vldmia      r0!, {d0-d1}
+    vldmia      r2, {d2-d5}
+    vmul.s32    d10, d14, d0[0]
+    vmul.s32    d11, d14, d0[1]
+    vmul.s32    d12, d14, d1[0]
+    vmul.s32    d13, d14, d1[1]
+    vsra.s32    q1, q5, #14
+    vsra.s32    q2, q6, #14
+    subs        r1, #4
+    blt         mc_finish
+    vstmia      r2!, {d2-d5}
+    bgt         0b
+    nop
+    bxeq        lr
+
+mc_finish:
+    vstmia      r2!, {d2}
+    cmp         r1, #2
+    vstmiage    r2!, {d3}
+    cmp         r1, #3
+    vstmiage    r2!, {d4}
+    bx          lr
+
+
+.global mix_chan_rvb @ (int start, int count, int lv, int rv)
+mix_chan_rvb:
+    vmov.32     d14[0], r2
+    vmov.32     d14[1], r3             @ multipliers
+    mov         r12, r0
+    movw        r0, #:lower16:ChanBuf
+    movw        r3, #:lower16:sRVBStart
+    movw        r2, #:lower16:SSumLR
+    movt        r0, #:upper16:ChanBuf
+    movt        r3, #:upper16:sRVBStart
+    movt        r2, #:upper16:SSumLR
+    ldr         r3, [r3]
+    add         r0, r12, lsl #2
+    add         r2, r12, lsl #3
+    add         r3, r12, lsl #3
+0:
+    vldmia      r0!, {d0-d1}
+    vldmia      r2, {d2-d5}
+    vldmia      r3, {d6-d9}
+    vmul.s32    d10, d14, d0[0]
+    vmul.s32    d11, d14, d0[1]
+    vmul.s32    d12, d14, d1[0]
+    vmul.s32    d13, d14, d1[1]
+    vsra.s32    q1, q5, #14
+    vsra.s32    q2, q6, #14
+    vsra.s32    q3, q5, #14
+    vsra.s32    q4, q6, #14
+    subs        r1, #4
+    blt         mcr_finish
+    vstmia      r2!, {d2-d5}
+    vstmia      r3!, {d6-d9}
+    bgt         0b
+    nop
+    bxeq        lr
+
+mcr_finish:
+    vstmia      r2!, {d2}
+    vstmia      r3!, {d6}
+    cmp         r1, #2
+    vstmiage    r2!, {d3}
+    vstmiage    r3!, {d7}
+    cmp         r1, #3
+    vstmiage    r2!, {d4}
+    vstmiage    r3!, {d8}
+    bx          lr
+
+@ vim:filetype=armasm
index 791b27e..36ac018 100644 (file)
@@ -122,8 +122,8 @@ static const int f[8][2] = {   {    0,  0  },
                         {  115, -52 },
                         {   98, -55 },
                         {  122, -60 } };
-int ChanBuf[NSSIZE];
-int SSumLR[NSSIZE*2];
+int ChanBuf[NSSIZE+3];
+int SSumLR[(NSSIZE+3)*2];
 int iFMod[NSSIZE];
 int iCycle = 0;
 short * pS;
@@ -635,6 +635,49 @@ static int do_samples_noise(int ch, int ns, int ns_to)
  return -1;
 }
 
+#ifdef __arm__
+// asm code
+extern void mix_chan(int start, int count, int lv, int rv);
+extern void mix_chan_rvb(int start, int count, int lv, int rv);
+#else
+static void mix_chan(int start, int count, int lv, int rv)
+{
+ int *dst = SSumLR + start * 2;
+ const int *src = ChanBuf + start;
+ int l, r;
+
+ while (count--)
+  {
+   int sval = *src++;
+
+   l = (sval * lv) >> 14;
+   r = (sval * rv) >> 14;
+   *dst++ += l;
+   *dst++ += r;
+  }
+}
+
+static void mix_chan_rvb(int start, int count, int lv, int rv)
+{
+ int *dst = SSumLR + start * 2;
+ int *drvb = sRVBStart + start * 2;
+ const int *src = ChanBuf + start;
+ int l, r;
+
+ while (count--)
+  {
+   int sval = *src++;
+
+   l = (sval * lv) >> 14;
+   r = (sval * rv) >> 14;
+   *dst++ += l;
+   *dst++ += r;
+   *drvb++ += l;
+   *drvb++ += r;
+  }
+}
+#endif
+
 ////////////////////////////////////////////////////////////////////////
 // MAIN SPU FUNCTION
 // here is the main job handler... thread, timer or direct func call
@@ -725,30 +768,10 @@ static void *MAINThread(void *arg)
 
        if(s_chan[ch].bFMod==2)                         // fmod freq channel
         memcpy(iFMod, ChanBuf, sizeof(iFMod));
+       else if(s_chan[ch].bRVBActive)
+        mix_chan_rvb(ns_from,ns_to-ns_from,s_chan[ch].iLeftVolume,s_chan[ch].iRightVolume);
        else
-        {
-         int lv=s_chan[ch].iLeftVolume;
-         int rv=s_chan[ch].iRightVolume;
-
-         for(ns=ns_from;ns<ns_to;ns++)
-          {
-           int sval = ChanBuf[ns];
-           int l, r;
-
-           //////////////////////////////////////////////
-           // ok, left/right sound volume (psx volume goes from 0 ... 0x3fff)
-
-           l=(sval*lv)>>14;
-           r=(sval*rv)>>14;
-           SSumLR[ns*2]  +=l;
-           SSumLR[ns*2+1]+=r;
-
-           //////////////////////////////////////////////
-           // now let us store sound data for reverb    
-
-           if(s_chan[ch].bRVBActive) StoreREVERB(ch,ns,l,r);
-          }
-        }
+        mix_chan(ns_from,ns_to-ns_from,s_chan[ch].iLeftVolume,s_chan[ch].iRightVolume);
       }
     }