sound, improved and optimized reimplementation of libretro lowpass filter

author kub <derkub@gmail.com>

Mon, 21 Dec 2020 22:22:00 +0000 (23:22 +0100)

committer kub <derkub@gmail.com>

Wed, 23 Dec 2020 14:51:49 +0000 (15:51 +0100)
author kub <derkub@gmail.com>
Mon, 21 Dec 2020 22:22:00 +0000 (23:22 +0100)
committer kub <derkub@gmail.com>
Wed, 23 Dec 2020 14:51:49 +0000 (15:51 +0100)
diff --git a/pico/sound/mix.c b/pico/sound/mix.c

index 4b4bbdd..58e9c8c 100644 (file)
--- a/pico/sound/mix.c
+++ b/pico/sound/mix.c
@@ -1,6 +1,7 @@
  /*
   * some code for sample mixing
   * (C) notaz, 2006,2007
+ * (C) kub, 2019,2020          added filtering
   *
   * This work is licensed under the terms of MAME license.
   * See COPYING file in the top-level directory.
@@ -13,78 +14,97 @@
  
  /* limitter */
  #define Limit16(val) \
+       val -= val >> 2; /* reduce level to avoid clipping */   \
         if ((short)val != val) val = (val < 0 ? MINOUT : MAXOUT)
  
  int mix_32_to_16l_level;
  
-static struct iir2 { // 2-pole IIR
-       int     x[2];           // sample buffer
+static struct iir {
+       int     alpha;          // alpha for EMA low pass
         int     y[2];           // filter intermediates
-       int     i;
  } lfi2, rfi2;
  
  // NB ">>" rounds to -infinity, "/" to 0. To compensate the effect possibly use
  // "-(-y>>n)" (round to +infinity) instead of "y>>n" in places.
  
-// NB uses Q12 fixpoint; samples mustn't have more than 20 bits for this.
+// NB uses fixpoint; samples mustn't have more than (32-QB) bits. Adding the
+// outputs of the sound sources together yields a max. of 18 bits, restricting
+// QB to a maximum of 14.
  #define QB     12
+// NB alpha for DC filtering shouldn't be smaller than 1/(1<<QB) to avoid loss.
  
  
+// exponential moving average combined DC filter and lowpass filter
+// y0[n] = (x[n]-y0[n-1])*alpha+y0[n-1], y1[n] = (y0[n] - y1[n-1])*(1-1/8192)
+static inline int filter_band(struct iir *fi2, int x)
+{
+       // low pass. alpha is Q8 to avoid loss by 32 bit overflow.
+//     fi2->y[0] += ((x<<(QB-8)) - (fi2->y[0]>>8)) * fi2->alpha;
+       fi2->y[0] += (x - (fi2->y[0]>>QB)) * fi2->alpha;
+       // DC filter. for alpha=1-1/8192 cutoff ~1HZ, for 1-1/1024 ~7Hz
+       fi2->y[1] += (fi2->y[0] - fi2->y[1]) >> QB;
+       return (fi2->y[0] - fi2->y[1]) >> QB;
+}
+
  // exponential moving average filter for DC filtering
-// y[n] = (x[n]-y[n-1])*(1/8192) (corner approx. 20Hz, gain 1)
-static inline int filter_exp(struct iir2 *fi2, int x)
+// y[n] = (x[n]-y[n-1])*(1-1/8192) (corner approx. 1Hz, gain 1)
+static inline int filter_exp(struct iir *fi2, int x)
  {
-       int xf = (x<<QB) - fi2->y[0];
-       fi2->y[0] += xf >> 13;
-       xf -= xf >> 2;  // level reduction to avoid clipping from overshoot
-       return xf>>QB;
+       fi2->y[1] += ((x << QB) - fi2->y[1]) >> QB;
+       return x - (fi2->y[1] >> QB);
  }
  
  // unfiltered (for testing)
-static inline int filter_null(struct iir2 *fi2, int x)
+static inline int filter_null(struct iir *fi2, int x)
  {
         return x;
  }
  
+#define filter filter_band
+
  #define mix_32_to_16l_stereo_core(dest, src, count, lv, fl) {  \
         int l, r;                                               \
+       struct iir lf = lfi2, rf = rfi2;                        \
                                                                 \
         for (; count > 0; count--)                              \
         {                                                       \
                 l = r = *dest;                                  \
                 l += *src++ >> lv;                              \
                 r += *src++ >> lv;                              \
-               l = fl(&lfi2, l);                               \
-               r = fl(&rfi2, r);                               \
+               l = fl(&lf, l);                                 \
+               r = fl(&rf, r);                                 \
                 Limit16(l);                                     \
                 Limit16(r);                                     \
                 *dest++ = l;                                    \
                 *dest++ = r;                                    \
         }                                                       \
+       lfi2 = lf, rfi2 = rf;                                   \
  }
  
  void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count)
  {
-       mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level, filter_exp);
+       mix_32_to_16l_stereo_core(dest, src, count, mix_32_to_16l_level, filter);
  }
  
  void mix_32_to_16l_stereo(short *dest, int *src, int count)
  {
-       mix_32_to_16l_stereo_core(dest, src, count, 0, filter_exp);
+       mix_32_to_16l_stereo_core(dest, src, count, 0, filter);
  }
  
  void mix_32_to_16_mono(short *dest, int *src, int count)
  {
         int l;
+       struct iir lf = lfi2;
  
         for (; count > 0; count--)
         {
                 l = *dest;
                 l += *src++;
-               l = filter_exp(&lfi2, l);
+               l = filter(&lf, l);
                 Limit16(l);
                 *dest++ = l;
         }
+       lfi2 = lf;
  }
  
  
@@ -118,8 +138,9 @@ void mix_16h_to_32_s2(int *dest_buf, short *mp3_buf, int count)
         }
  }
  
-void mix_reset(void)
+void mix_reset(int alpha_q16)
  {
         memset(&lfi2, 0, sizeof(lfi2));
         memset(&rfi2, 0, sizeof(rfi2));
+       lfi2.alpha = rfi2.alpha = (0x10000-alpha_q16) >> 4; // filter alpha, Q12
  }
diff --git a/pico/sound/mix.h b/pico/sound/mix.h

index e128bad..a0dfcac 100644 (file)
--- a/pico/sound/mix.h
+++ b/pico/sound/mix.h
@@ -8,4 +8,4 @@ void mix_32_to_16_mono(short *dest, int *src, int count);
  
  extern int mix_32_to_16l_level;
  void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count);
-void mix_reset(void);
+void mix_reset(int alpha_q16);
diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S

index a1558d7..6043898 100644 (file)
--- a/pico/sound/mix_arm.S
+++ b/pico/sound/mix_arm.S
@@ -154,34 +154,46 @@ m16_32_s2_no_unal2:
  
  
  @ limit
-@ reg=int_sample, lr=1, r3=tmp, kills flags
+@ reg=int_sample, r12=1, r8=tmp, kills flags
  .macro Limit reg
-    add     r3, lr, \reg, asr #15
-    bics    r3, r3, #1                 @ in non-overflow conditions r3 is 0 or 1
+    sub     \reg, \reg, \reg, asr #2   @ reduce audio lvl some to avoid clipping
+    add     r8, r12, \reg, asr #15
+    bics    r8, r8, #1                 @ in non-overflow conditions r8 is 0 or 1
      movne   \reg, #0x8000
      subpl   \reg, \reg, #1
  .endm
  
-
  @ limit and shift up by 16
-@ reg=int_sample, lr=1, r3=tmp, kills flags
+@ reg=int_sample, r12=1, r8=tmp, kills flags
  .macro Limitsh reg
-    add     r3, lr, \reg, asr #15
-    bics    r3, r3, #1                 @ in non-overflow conditions r3 is 0 or 1
+    sub     \reg, \reg, \reg, asr #2   @ reduce audio lvl some to avoid clipping
+    add     r8, r12,\reg, asr #15
+    bics    r8, r8, #1                 @ in non-overflow conditions r8 is 0 or 1
      moveq   \reg, \reg, lsl #16
      movne   \reg, #0x80000000
      subpl   \reg, \reg, #0x00010000
  .endm
  
+
  @ filter out DC offset
-@ in=int_sample (max 20 bit), y=filter memory, r3=tmp
+@ in=int_sample (max 20 bit), y=filter memory, r8=tmp
  .macro DCfilt in y
-    rsb     r3, \y, \in, lsl #12               @ fixpoint 20.12
-    add     \y, \y, r3, asr #13
-    sub     r3, r3, r3, asr #2                 @ reduce audio lvl some
-    asr     \in, r3, #12
+    rsb     r8, \y, \in, lsl #12       @ fixpoint 20.12
+    add     \y, \y, r8, asr #12                @ alpha = 1-1/4094
+    sub     \in, \in, \y, asr #12
+.endm
+
+@ lowpass filter
+@ in=int_sample (max 20 bit), y=filter memory, r12=alpha(Q8), r8=tmp
+.macro LPfilt in y
+@    asr     r8, \y, #8
+@    rsb     r8, r8, \in, lsl #4               @ fixpoint 20.12
+    sub     r8, \in, \y, asr #12               @ fixpoint 20.12
+    mla     \y, r8, r12, \y
+    asr     \in, \y, #12
  .endm
  
+
  @ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only
  @ warning: this function assumes dest is word aligned
  .global mix_32_to_16l_stereo @ short *dest, int *src, int count
@@ -193,9 +205,10 @@ mix_32_to_16l_stereo:
      subs    r2, r2, #4
      bmi     m32_16l_st_end
  
-    mov     lr, #1
      ldr     r12, =filter
-    ldmia   r12, {r10-r11}
+    ldr     r8, [r12], #4
+    ldmia   r12, {r3,r10-r11,lr}
+    str     r8, [sp, #-4]!
  
  m32_16l_st_loop:
      ldmia   r0,  {r8,r12}
@@ -206,10 +219,16 @@ m32_16l_st_loop:
      add     r5, r5, r8, asr #16
      add     r6, r6, r12,asr #16
      add     r7, r7, r12,asr #16
+    ldr     r12,[sp]
+    LPfilt  r4, r3
+    LPfilt  r5, lr
+    LPfilt  r6, r3
+    LPfilt  r7, lr
      DCfilt  r4, r10
      DCfilt  r5, r11
      DCfilt  r6, r10
      DCfilt  r7, r11
+    mov     r12,#1
      Limitsh r4
      Limitsh r5
      Limitsh r6
@@ -228,8 +247,12 @@ m32_16l_st_end:
      ldmia   r1!,{r4,r5}
      add     r4, r4, r6
      add     r5, r5, r6
+    ldr     r12,[sp]
+    LPfilt  r4, r3
+    LPfilt  r5, lr
      DCfilt  r4, r10
      DCfilt  r5, r11
+    mov     r12,#1
      Limitsh r4
      Limitsh r5
      orr     r4, r5, r4, lsr #16
@@ -237,7 +260,9 @@ m32_16l_st_end:
  
  m32_16l_st_no_unal2:
      ldr     r12, =filter
-    stmia   r12, {r10-r11}
+    add     r12,r12, #4
+    stmia   r12, {r3,r10-r11,lr}
+    add     sp, sp, #4
      ldmfd   sp!, {r4-r8,r10-r11,lr}
      bx      lr
  
@@ -248,9 +273,10 @@ m32_16l_st_no_unal2:
  mix_32_to_16_mono:
      stmfd   sp!, {r4-r8,r10-r11,lr}
  
-    mov     lr, #1
      ldr     r12, =filter
-    ldr     r10, [r12]
+    ldr     r8, [r12], #4
+    ldmia   r12, {r10-r11}
+    str     r8, [sp, #-4]!
  
      @ check if dest is word aligned
      tst     r0, #2
@@ -259,6 +285,10 @@ mix_32_to_16_mono:
      ldr     r4, [r1], #4
      sub     r2, r2, #1
      add     r4, r4, r5
+    ldr     r12,[sp]
+    LPfilt  r4, r11
+    DCfilt  r4, r10
+    mov     r12,#1
      Limit   r4
      strh    r4, [r0], #2
  
@@ -275,10 +305,16 @@ m32_16_mo_loop:
      add     r7, r7, r12,asr #16
      mov     r12,r12,lsl #16
      add     r6, r6, r12,asr #16
+    ldr     r12,[sp]
+    LPfilt  r4, r11
+    LPfilt  r5, r11
+    LPfilt  r6, r11
+    LPfilt  r7, r11
      DCfilt  r4, r10
      DCfilt  r5, r10
      DCfilt  r6, r10
      DCfilt  r7, r10
+    mov     r12,#1
      Limitsh r4
      Limitsh r5
      Limitsh r6
@@ -298,8 +334,12 @@ m32_16_mo_end:
      add     r5, r5, r6, asr #16
      mov     r6, r6, lsl #16
      add     r4, r4, r6, asr #16
+    ldr     r12,[sp]
+    LPfilt  r4, r11
+    LPfilt  r5, r11
      DCfilt  r4, r10
      DCfilt  r5, r10
+    mov     r12,#1
      Limitsh r4
      Limitsh r5
      orr     r4, r5, r4, lsr #16
@@ -311,13 +351,18 @@ m32_16_mo_no_unal2:
      ldrsh   r5, [r0]
      ldr     r4, [r1], #4
      add     r4, r4, r5
+    ldr     r12,[sp]
+    LPfilt  r4, r11
      DCfilt  r4, r10
+    mov     r12,#1
      Limit   r4
      strh    r4, [r0], #2
  
  m32_16_mo_no_unal:
      ldr     r12, =filter
-    str     r10, [r12]
+    add     r12,r12, #4
+    stmia   r12, {r10-r11}
+    add     sp, sp, #4
      ldmfd   sp!, {r4-r8,r10-r11,lr}
      bx      lr
  
@@ -344,7 +389,9 @@ mix_32_to_16l_stereo_lvl:
      mov     lr, #1
      ldr     r9, [r9]
      ldr     r12, =filter
-    ldm     r12, {r10-r11}
+    ldr     r8, [r12], #4
+    ldmia   r12, {r3,r10-r11,lr}
+    str     r8, [sp, #-4]!
  
      mov     r2, r2, lsl #1
      subs    r2, r2, #4
@@ -363,10 +410,16 @@ m32_16l_st_l_loop:
      mov     r5, r5, asr r9
      mov     r6, r6, asr r9
      mov     r7, r7, asr r9
+    ldr     r12,[sp]
+    LPfilt  r4, r3
+    LPfilt  r5, lr
+    LPfilt  r6, r3
+    LPfilt  r7, lr
      DCfilt  r4, r10
      DCfilt  r5, r11
      DCfilt  r6, r10
      DCfilt  r7, r11
+    mov     r12,#1
      Limitsh r4
      Limitsh r5
      Limitsh r6
@@ -387,8 +440,12 @@ m32_16l_st_l_end:
      add     r5, r5, r6
      mov     r4, r4, asr r9
      mov     r5, r5, asr r9
+    ldr     r12,[sp]
+    LPfilt  r4, r3
+    LPfilt  r5, lr
      DCfilt  r4, r10
      DCfilt  r5, r11
+    mov     r12,#1
      Limitsh r4
      Limitsh r5
      orr     r4, r5, r4, lsr #16
@@ -396,22 +453,32 @@ m32_16l_st_l_end:
  
  m32_16l_st_l_no_unal2:
      ldr     r12, =filter
-    stmia   r12, {r10-r11}
+    add     r12,r12, #4
+    stmia   r12, {r3,r10-r11,lr}
+    add     sp, sp, #4
      ldmfd   sp!, {r4-r11,lr}
      bx      lr
  
  #endif /* __GP2X__ */
  
-.global mix_reset @ void
+.global mix_reset @ int alpha_q16
  mix_reset:
-    ldr     r0, =filter
+    ldr     r2, =filter
+    rsb     r0, r0, #0x10000
+@    asr     r0, r0, #8
+    asr     r0, r0, #4
+    str     r0, [r2], #4
      mov     r1, #0
-    str     r1, [r0]
-    str     r1, [r0, #4]
+    str     r1, [r2], #4
+    str     r1, [r2], #4
+    str     r1, [r2], #4
+    str     r1, [r2], #4
      bx      lr
  
  .data
  filter:
-    .ds     8
+    .ds     4                          @ alpha_q8
+    .ds     8                          @ filter history for left channel
+    .ds     8                          @ filter history for right channel
  
  @ vim:filetype=armasm
diff --git a/pico/sound/sound.c b/pico/sound/sound.c

index 678330a..ad74868 100644 (file)
--- a/pico/sound/sound.c
+++ b/pico/sound/sound.c
@@ -26,73 +26,6 @@ short cdda_out_buffer[2*1152];
  // sn76496\r
  extern int *sn76496_regs;\r
  \r
-// Low pass filter 'previous' samples\r
-static int32_t lpf_lp;\r
-static int32_t lpf_rp;\r
-\r
-static void low_pass_filter_stereo(int *buf32, int length)\r
-{\r
-  int samples = length;\r
-  int *out32 = buf32;\r
-  // Restore previous samples\r
-  int32_t lpf_l = lpf_lp;\r
-  int32_t lpf_r = lpf_rp;\r
-\r
-  // Single-pole low-pass filter (6 dB/octave)\r
-  int32_t factor_a = PicoIn.sndFilterRange;\r
-  int32_t factor_b = 0x10000 - factor_a;\r
-\r
-  do\r
-  {\r
-    // Apply low-pass filter\r
-    lpf_l = (lpf_l * factor_a) + (out32[0] * factor_b);\r
-    lpf_r = (lpf_r * factor_a) + (out32[1] * factor_b);\r
-\r
-    // 16.16 fixed point\r
-    lpf_l >>= 16;\r
-    lpf_r >>= 16;\r
-\r
-    // Update sound buffer\r
-    *out32++ = lpf_l;\r
-    *out32++ = lpf_r;\r
-  }\r
-  while (--samples);\r
-\r
-  // Save last samples for next frame\r
-  lpf_lp = lpf_l;\r
-  lpf_rp = lpf_r;\r
-}\r
-\r
-static void low_pass_filter_mono(int *buf32, int length)\r
-{\r
-  int samples = length;\r
-  int *out32 = buf32;\r
-  // Restore previous sample\r
-  int32_t lpf_l = lpf_lp;\r
-\r
-  // Single-pole low-pass filter (6 dB/octave)\r
-  int32_t factor_a = PicoIn.sndFilterRange;\r
-  int32_t factor_b = 0x10000 - factor_a;\r
-\r
-  do\r
-  {\r
-    // Apply low-pass filter\r
-    lpf_l = (lpf_l * factor_a) + (out32[0] * factor_b);\r
-\r
-    // 16.16 fixed point\r
-    lpf_l >>= 16;\r
-\r
-    // Update sound buffer\r
-    *out32++ = lpf_l;\r
-  }\r
-  while (--samples);\r
-\r
-  // Save last sample for next frame\r
-  lpf_lp = lpf_l;\r
-}\r
-\r
-void (*low_pass_filter)(int *buf32, int length) = low_pass_filter_stereo;\r
-\r
  // ym2413\r
  #define YM2413_CLK 3579545\r
  OPLL old_opll;\r
@@ -119,11 +52,7 @@ PICO_INTERNAL void PsndReset(void)
    PsndRerate(0);\r
    timers_reset();\r
  \r
-  // Reset low pass filter\r
-  lpf_lp = 0;\r
-  lpf_rp = 0;\r
-\r
-  mix_reset();\r
+  mix_reset(PicoIn.sndFilter ? PicoIn.sndFilterRange : 0);\r
  }\r
  \r
  \r
@@ -179,9 +108,6 @@ void PsndRerate(int preserve_state)
    // set mixer\r
    PsndMix_32_to_16l = (PicoIn.opt & POPT_EN_STEREO) ? mix_32_to_16l_stereo : mix_32_to_16_mono;\r
  \r
-  // set low pass filter\r
-  low_pass_filter = (PicoIn.opt & POPT_EN_STEREO) ? low_pass_filter_stereo : low_pass_filter_mono;\r
-\r
    if (PicoIn.AHW & PAHW_PICO)\r
      PicoReratePico();\r
  }\r
@@ -463,11 +389,6 @@ static int PsndRender(int offset, int length)
    if ((PicoIn.AHW & PAHW_32X) && (PicoIn.opt & POPT_EN_PWM))\r
      p32x_pwm_update(buf32, length-offset, stereo);\r
  \r
-  // Apply low pass filter, if required\r
-  if (PicoIn.sndFilter == 1) {\r
-    low_pass_filter(buf32, length);\r
-  }\r
-\r
    // convert + limit to normal 16bit output\r
    PsndMix_32_to_16l(PicoIn.sndOut+(offset<<stereo), buf32, length-offset);\r
  \r
diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c

index 6de1034..dd3b90e 100644 (file)
--- a/platform/libretro/libretro.c
+++ b/platform/libretro/libretro.c
@@ -70,6 +70,7 @@ int _newlib_vm_size_user = 1 << TARGET_SIZE_2;
  #include <pico/pico_int.h>
  #include <pico/state.h>
  #include <pico/patch.h>
+#include <pico/sound/mix.h>
  #include "../common/input_pico.h"
  #include "../common/version.h"
  #include <libretro.h>
@@ -1434,6 +1435,8 @@ static void update_variables(bool first_run)
     unsigned old_frameskip_type;
     int old_vout_format;
     double new_sound_rate;
+   unsigned short old_snd_filter;
+   int32_t old_snd_filter_range;
  
     var.value = NULL;
     var.key = "picodrive_input1";
@@ -1539,6 +1542,7 @@ static void update_variables(bool first_run)
        PicoIn.opt &= ~POPT_EN_DRC;
  #endif
  
+   old_snd_filter = PicoIn.sndFilter;
     var.value = NULL;
     var.key = "picodrive_audio_filter";
     PicoIn.sndFilter = 0;
@@ -1547,6 +1551,7 @@ static void update_variables(bool first_run)
           PicoIn.sndFilter = 1;
     }
  
+   old_snd_filter_range = PicoIn.sndFilterRange;
     var.value = NULL;
     var.key = "picodrive_lowpass_range";
     PicoIn.sndFilterRange = (60 * 65536) / 100;
@@ -1554,6 +1559,10 @@ static void update_variables(bool first_run)
        PicoIn.sndFilterRange = (atoi(var.value) * 65536) / 100;
     }
  
+   if (old_snd_filter != PicoIn.sndFilter || old_snd_filter_range != PicoIn.sndFilterRange) {
+      mix_reset(PicoIn.sndFilter ? PicoIn.sndFilterRange : 0);
+   }
+
     old_frameskip_type = frameskip_type;
     frameskip_type     = 0;
     var.key            = "picodrive_frameskip";
author	kub <derkub@gmail.com>
	Mon, 21 Dec 2020 22:22:00 +0000 (23:22 +0100)
committer	kub <derkub@gmail.com>
	Wed, 23 Dec 2020 14:51:49 +0000 (15:51 +0100)
pico/sound/mix.c		patch \| blob \| blame \| history
pico/sound/mix.h		patch \| blob \| blame \| history
pico/sound/mix_arm.S		patch \| blob \| blame \| history
pico/sound/sound.c		patch \| blob \| blame \| history
platform/libretro/libretro.c		patch \| blob \| blame \| history